1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2014 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
29 #include "print-tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
40 #include "insn-attr.h"
47 #include "diagnostic-core.h"
48 #include "basic-block.h"
51 #include "target-def.h"
53 #include "langhooks.h"
55 #include "hash-table.h"
57 #include "basic-block.h"
58 #include "tree-ssa-alias.h"
59 #include "internal-fn.h"
60 #include "gimple-fold.h"
62 #include "gimple-expr.h"
70 #include "tree-pass.h"
74 /* Define the specific costs for a given cpu. */
76 struct processor_costs
79 const int m; /* cost of an M instruction. */
80 const int mghi; /* cost of an MGHI instruction. */
81 const int mh; /* cost of an MH instruction. */
82 const int mhi; /* cost of an MHI instruction. */
83 const int ml; /* cost of an ML instruction. */
84 const int mr; /* cost of an MR instruction. */
85 const int ms; /* cost of an MS instruction. */
86 const int msg; /* cost of an MSG instruction. */
87 const int msgf; /* cost of an MSGF instruction. */
88 const int msgfr; /* cost of an MSGFR instruction. */
89 const int msgr; /* cost of an MSGR instruction. */
90 const int msr; /* cost of an MSR instruction. */
91 const int mult_df; /* cost of multiplication in DFmode. */
94 const int sqxbr; /* cost of square root in TFmode. */
95 const int sqdbr; /* cost of square root in DFmode. */
96 const int sqebr; /* cost of square root in SFmode. */
97 /* multiply and add */
98 const int madbr; /* cost of multiply and add in DFmode. */
99 const int maebr; /* cost of multiply and add in SFmode. */
111 const struct processor_costs *s390_cost;
114 struct processor_costs z900_cost =
116 COSTS_N_INSNS (5), /* M */
117 COSTS_N_INSNS (10), /* MGHI */
118 COSTS_N_INSNS (5), /* MH */
119 COSTS_N_INSNS (4), /* MHI */
120 COSTS_N_INSNS (5), /* ML */
121 COSTS_N_INSNS (5), /* MR */
122 COSTS_N_INSNS (4), /* MS */
123 COSTS_N_INSNS (15), /* MSG */
124 COSTS_N_INSNS (7), /* MSGF */
125 COSTS_N_INSNS (7), /* MSGFR */
126 COSTS_N_INSNS (10), /* MSGR */
127 COSTS_N_INSNS (4), /* MSR */
128 COSTS_N_INSNS (7), /* multiplication in DFmode */
129 COSTS_N_INSNS (13), /* MXBR */
130 COSTS_N_INSNS (136), /* SQXBR */
131 COSTS_N_INSNS (44), /* SQDBR */
132 COSTS_N_INSNS (35), /* SQEBR */
133 COSTS_N_INSNS (18), /* MADBR */
134 COSTS_N_INSNS (13), /* MAEBR */
135 COSTS_N_INSNS (134), /* DXBR */
136 COSTS_N_INSNS (30), /* DDBR */
137 COSTS_N_INSNS (27), /* DEBR */
138 COSTS_N_INSNS (220), /* DLGR */
139 COSTS_N_INSNS (34), /* DLR */
140 COSTS_N_INSNS (34), /* DR */
141 COSTS_N_INSNS (32), /* DSGFR */
142 COSTS_N_INSNS (32), /* DSGR */
146 struct processor_costs z990_cost =
148 COSTS_N_INSNS (4), /* M */
149 COSTS_N_INSNS (2), /* MGHI */
150 COSTS_N_INSNS (2), /* MH */
151 COSTS_N_INSNS (2), /* MHI */
152 COSTS_N_INSNS (4), /* ML */
153 COSTS_N_INSNS (4), /* MR */
154 COSTS_N_INSNS (5), /* MS */
155 COSTS_N_INSNS (6), /* MSG */
156 COSTS_N_INSNS (4), /* MSGF */
157 COSTS_N_INSNS (4), /* MSGFR */
158 COSTS_N_INSNS (4), /* MSGR */
159 COSTS_N_INSNS (4), /* MSR */
160 COSTS_N_INSNS (1), /* multiplication in DFmode */
161 COSTS_N_INSNS (28), /* MXBR */
162 COSTS_N_INSNS (130), /* SQXBR */
163 COSTS_N_INSNS (66), /* SQDBR */
164 COSTS_N_INSNS (38), /* SQEBR */
165 COSTS_N_INSNS (1), /* MADBR */
166 COSTS_N_INSNS (1), /* MAEBR */
167 COSTS_N_INSNS (60), /* DXBR */
168 COSTS_N_INSNS (40), /* DDBR */
169 COSTS_N_INSNS (26), /* DEBR */
170 COSTS_N_INSNS (176), /* DLGR */
171 COSTS_N_INSNS (31), /* DLR */
172 COSTS_N_INSNS (31), /* DR */
173 COSTS_N_INSNS (31), /* DSGFR */
174 COSTS_N_INSNS (31), /* DSGR */
178 struct processor_costs z9_109_cost =
180 COSTS_N_INSNS (4), /* M */
181 COSTS_N_INSNS (2), /* MGHI */
182 COSTS_N_INSNS (2), /* MH */
183 COSTS_N_INSNS (2), /* MHI */
184 COSTS_N_INSNS (4), /* ML */
185 COSTS_N_INSNS (4), /* MR */
186 COSTS_N_INSNS (5), /* MS */
187 COSTS_N_INSNS (6), /* MSG */
188 COSTS_N_INSNS (4), /* MSGF */
189 COSTS_N_INSNS (4), /* MSGFR */
190 COSTS_N_INSNS (4), /* MSGR */
191 COSTS_N_INSNS (4), /* MSR */
192 COSTS_N_INSNS (1), /* multiplication in DFmode */
193 COSTS_N_INSNS (28), /* MXBR */
194 COSTS_N_INSNS (130), /* SQXBR */
195 COSTS_N_INSNS (66), /* SQDBR */
196 COSTS_N_INSNS (38), /* SQEBR */
197 COSTS_N_INSNS (1), /* MADBR */
198 COSTS_N_INSNS (1), /* MAEBR */
199 COSTS_N_INSNS (60), /* DXBR */
200 COSTS_N_INSNS (40), /* DDBR */
201 COSTS_N_INSNS (26), /* DEBR */
202 COSTS_N_INSNS (30), /* DLGR */
203 COSTS_N_INSNS (23), /* DLR */
204 COSTS_N_INSNS (23), /* DR */
205 COSTS_N_INSNS (24), /* DSGFR */
206 COSTS_N_INSNS (24), /* DSGR */
210 struct processor_costs z10_cost =
212 COSTS_N_INSNS (10), /* M */
213 COSTS_N_INSNS (10), /* MGHI */
214 COSTS_N_INSNS (10), /* MH */
215 COSTS_N_INSNS (10), /* MHI */
216 COSTS_N_INSNS (10), /* ML */
217 COSTS_N_INSNS (10), /* MR */
218 COSTS_N_INSNS (10), /* MS */
219 COSTS_N_INSNS (10), /* MSG */
220 COSTS_N_INSNS (10), /* MSGF */
221 COSTS_N_INSNS (10), /* MSGFR */
222 COSTS_N_INSNS (10), /* MSGR */
223 COSTS_N_INSNS (10), /* MSR */
224 COSTS_N_INSNS (1) , /* multiplication in DFmode */
225 COSTS_N_INSNS (50), /* MXBR */
226 COSTS_N_INSNS (120), /* SQXBR */
227 COSTS_N_INSNS (52), /* SQDBR */
228 COSTS_N_INSNS (38), /* SQEBR */
229 COSTS_N_INSNS (1), /* MADBR */
230 COSTS_N_INSNS (1), /* MAEBR */
231 COSTS_N_INSNS (111), /* DXBR */
232 COSTS_N_INSNS (39), /* DDBR */
233 COSTS_N_INSNS (32), /* DEBR */
234 COSTS_N_INSNS (160), /* DLGR */
235 COSTS_N_INSNS (71), /* DLR */
236 COSTS_N_INSNS (71), /* DR */
237 COSTS_N_INSNS (71), /* DSGFR */
238 COSTS_N_INSNS (71), /* DSGR */
242 struct processor_costs z196_cost =
244 COSTS_N_INSNS (7), /* M */
245 COSTS_N_INSNS (5), /* MGHI */
246 COSTS_N_INSNS (5), /* MH */
247 COSTS_N_INSNS (5), /* MHI */
248 COSTS_N_INSNS (7), /* ML */
249 COSTS_N_INSNS (7), /* MR */
250 COSTS_N_INSNS (6), /* MS */
251 COSTS_N_INSNS (8), /* MSG */
252 COSTS_N_INSNS (6), /* MSGF */
253 COSTS_N_INSNS (6), /* MSGFR */
254 COSTS_N_INSNS (8), /* MSGR */
255 COSTS_N_INSNS (6), /* MSR */
256 COSTS_N_INSNS (1) , /* multiplication in DFmode */
257 COSTS_N_INSNS (40), /* MXBR B+40 */
258 COSTS_N_INSNS (100), /* SQXBR B+100 */
259 COSTS_N_INSNS (42), /* SQDBR B+42 */
260 COSTS_N_INSNS (28), /* SQEBR B+28 */
261 COSTS_N_INSNS (1), /* MADBR B */
262 COSTS_N_INSNS (1), /* MAEBR B */
263 COSTS_N_INSNS (101), /* DXBR B+101 */
264 COSTS_N_INSNS (29), /* DDBR */
265 COSTS_N_INSNS (22), /* DEBR */
266 COSTS_N_INSNS (160), /* DLGR cracked */
267 COSTS_N_INSNS (160), /* DLR cracked */
268 COSTS_N_INSNS (160), /* DR expanded */
269 COSTS_N_INSNS (160), /* DSGFR cracked */
270 COSTS_N_INSNS (160), /* DSGR cracked */
274 struct processor_costs zEC12_cost =
276 COSTS_N_INSNS (7), /* M */
277 COSTS_N_INSNS (5), /* MGHI */
278 COSTS_N_INSNS (5), /* MH */
279 COSTS_N_INSNS (5), /* MHI */
280 COSTS_N_INSNS (7), /* ML */
281 COSTS_N_INSNS (7), /* MR */
282 COSTS_N_INSNS (6), /* MS */
283 COSTS_N_INSNS (8), /* MSG */
284 COSTS_N_INSNS (6), /* MSGF */
285 COSTS_N_INSNS (6), /* MSGFR */
286 COSTS_N_INSNS (8), /* MSGR */
287 COSTS_N_INSNS (6), /* MSR */
288 COSTS_N_INSNS (1) , /* multiplication in DFmode */
289 COSTS_N_INSNS (40), /* MXBR B+40 */
290 COSTS_N_INSNS (100), /* SQXBR B+100 */
291 COSTS_N_INSNS (42), /* SQDBR B+42 */
292 COSTS_N_INSNS (28), /* SQEBR B+28 */
293 COSTS_N_INSNS (1), /* MADBR B */
294 COSTS_N_INSNS (1), /* MAEBR B */
295 COSTS_N_INSNS (131), /* DXBR B+131 */
296 COSTS_N_INSNS (29), /* DDBR */
297 COSTS_N_INSNS (22), /* DEBR */
298 COSTS_N_INSNS (160), /* DLGR cracked */
299 COSTS_N_INSNS (160), /* DLR cracked */
300 COSTS_N_INSNS (160), /* DR expanded */
301 COSTS_N_INSNS (160), /* DSGFR cracked */
302 COSTS_N_INSNS (160), /* DSGR cracked */
305 extern int reload_completed;
307 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
308 static rtx_insn *last_scheduled_insn;
310 /* Structure used to hold the components of a S/390 memory
311 address. A legitimate address on S/390 is of the general
313 base + index + displacement
314 where any of the components is optional.
316 base and index are registers of the class ADDR_REGS,
317 displacement is an unsigned 12-bit immediate constant. */
328 /* The following structure is embedded in the machine
329 specific part of struct function. */
331 struct GTY (()) s390_frame_layout
333 /* Offset within stack frame. */
334 HOST_WIDE_INT gprs_offset;
335 HOST_WIDE_INT f0_offset;
336 HOST_WIDE_INT f4_offset;
337 HOST_WIDE_INT f8_offset;
338 HOST_WIDE_INT backchain_offset;
340 /* Number of first and last gpr where slots in the register
341 save area are reserved for. */
342 int first_save_gpr_slot;
343 int last_save_gpr_slot;
345 /* Location (FP register number) where GPRs (r0-r15) should
347 0 - does not need to be saved at all
349 signed char gpr_save_slots[16];
351 /* Number of first and last gpr to be saved, restored. */
353 int first_restore_gpr;
355 int last_restore_gpr;
357 /* Bits standing for floating point registers. Set, if the
358 respective register has to be saved. Starting with reg 16 (f0)
359 at the rightmost bit.
360 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
361 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
362 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
363 unsigned int fpr_bitmap;
365 /* Number of floating point registers f8-f15 which must be saved. */
368 /* Set if return address needs to be saved.
369 This flag is set by s390_return_addr_rtx if it could not use
370 the initial value of r14 and therefore depends on r14 saved
372 bool save_return_addr_p;
374 /* Size of stack frame. */
375 HOST_WIDE_INT frame_size;
378 /* Define the structure for the machine field in struct function. */
380 struct GTY(()) machine_function
382 struct s390_frame_layout frame_layout;
384 /* Literal pool base register. */
387 /* True if we may need to perform branch splitting. */
388 bool split_branches_pending_p;
390 bool has_landing_pad_p;
392 /* True if the current function may contain a tbegin clobbering
397 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
399 #define cfun_frame_layout (cfun->machine->frame_layout)
400 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
401 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
402 ? cfun_frame_layout.fpr_bitmap & 0x0f \
403 : cfun_frame_layout.fpr_bitmap & 0x03))
404 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
405 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
406 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
407 (1 << (REGNO - FPR0_REGNUM)))
408 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
409 (1 << (REGNO - FPR0_REGNUM))))
410 #define cfun_gpr_save_slot(REGNO) \
411 cfun->machine->frame_layout.gpr_save_slots[REGNO]
413 /* Number of GPRs and FPRs used for argument passing. */
414 #define GP_ARG_NUM_REG 5
415 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
417 /* A couple of shortcuts. */
418 #define CONST_OK_FOR_J(x) \
419 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
420 #define CONST_OK_FOR_K(x) \
421 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
422 #define CONST_OK_FOR_Os(x) \
423 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
424 #define CONST_OK_FOR_Op(x) \
425 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
426 #define CONST_OK_FOR_On(x) \
427 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
429 #define REGNO_PAIR_OK(REGNO, MODE) \
430 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
432 /* That's the read ahead of the dynamic branch prediction unit in
433 bytes on a z10 (or higher) CPU. */
434 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
436 static const int s390_hotpatch_trampoline_halfwords_default = 12;
437 static const int s390_hotpatch_trampoline_halfwords_max = 1000000;
438 static int s390_hotpatch_trampoline_halfwords = -1;
440 /* Return the argument of the given hotpatch attribute or the default value if
441 no argument is present. */
444 get_hotpatch_attribute (tree hotpatch_attr)
448 args = TREE_VALUE (hotpatch_attr);
451 TREE_INT_CST_LOW (TREE_VALUE (args)):
452 s390_hotpatch_trampoline_halfwords_default;
455 /* Check whether the hotpatch attribute is applied to a function and, if it has
456 an argument, the argument is valid. */
459 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
460 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
462 if (TREE_CODE (*node) != FUNCTION_DECL)
464 warning (OPT_Wattributes, "%qE attribute only applies to functions",
466 *no_add_attrs = true;
470 tree expr = TREE_VALUE (args);
472 if (TREE_CODE (expr) != INTEGER_CST
473 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
474 || wi::gtu_p (expr, s390_hotpatch_trampoline_halfwords_max))
476 error ("requested %qE attribute is not a non-negative integer"
477 " constant or too large (max. %d)", name,
478 s390_hotpatch_trampoline_halfwords_max);
479 *no_add_attrs = true;
486 static const struct attribute_spec s390_attribute_table[] = {
487 { "hotpatch", 0, 1, true, false, false, s390_handle_hotpatch_attribute, false
490 { NULL, 0, 0, false, false, false, NULL, false }
493 /* Return the alignment for LABEL. We default to the -falign-labels
494 value except for the literal pool base label. */
496 s390_label_align (rtx label)
498 rtx prev_insn = prev_active_insn (label);
500 if (prev_insn == NULL_RTX)
503 prev_insn = single_set (prev_insn);
505 if (prev_insn == NULL_RTX)
508 prev_insn = SET_SRC (prev_insn);
510 /* Don't align literal pool base labels. */
511 if (GET_CODE (prev_insn) == UNSPEC
512 && XINT (prev_insn, 1) == UNSPEC_MAIN_BASE)
516 return align_labels_log;
519 static enum machine_mode
520 s390_libgcc_cmp_return_mode (void)
522 return TARGET_64BIT ? DImode : SImode;
525 static enum machine_mode
526 s390_libgcc_shift_count_mode (void)
528 return TARGET_64BIT ? DImode : SImode;
531 static enum machine_mode
532 s390_unwind_word_mode (void)
534 return TARGET_64BIT ? DImode : SImode;
537 /* Return true if the back end supports mode MODE. */
539 s390_scalar_mode_supported_p (enum machine_mode mode)
541 /* In contrast to the default implementation reject TImode constants on 31bit
542 TARGET_ZARCH for ABI compliance. */
543 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
546 if (DECIMAL_FLOAT_MODE_P (mode))
547 return default_decimal_float_supported_p ();
549 return default_scalar_mode_supported_p (mode);
552 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
555 s390_set_has_landing_pad_p (bool value)
557 cfun->machine->has_landing_pad_p = value;
560 /* If two condition code modes are compatible, return a condition code
561 mode which is compatible with both. Otherwise, return
564 static enum machine_mode
565 s390_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
573 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
574 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
595 /* Return true if SET either doesn't set the CC register, or else
596 the source and destination have matching CC modes and that
597 CC mode is at least as constrained as REQ_MODE. */
600 s390_match_ccmode_set (rtx set, enum machine_mode req_mode)
602 enum machine_mode set_mode;
604 gcc_assert (GET_CODE (set) == SET);
606 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
609 set_mode = GET_MODE (SET_DEST (set));
623 if (req_mode != set_mode)
628 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
629 && req_mode != CCSRmode && req_mode != CCURmode)
635 if (req_mode != CCAmode)
643 return (GET_MODE (SET_SRC (set)) == set_mode);
646 /* Return true if every SET in INSN that sets the CC register
647 has source and destination with matching CC modes and that
648 CC mode is at least as constrained as REQ_MODE.
649 If REQ_MODE is VOIDmode, always return false. */
652 s390_match_ccmode (rtx_insn *insn, enum machine_mode req_mode)
656 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
657 if (req_mode == VOIDmode)
660 if (GET_CODE (PATTERN (insn)) == SET)
661 return s390_match_ccmode_set (PATTERN (insn), req_mode);
663 if (GET_CODE (PATTERN (insn)) == PARALLEL)
664 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
666 rtx set = XVECEXP (PATTERN (insn), 0, i);
667 if (GET_CODE (set) == SET)
668 if (!s390_match_ccmode_set (set, req_mode))
675 /* If a test-under-mask instruction can be used to implement
676 (compare (and ... OP1) OP2), return the CC mode required
677 to do that. Otherwise, return VOIDmode.
678 MIXED is true if the instruction can distinguish between
679 CC1 and CC2 for mixed selected bits (TMxx), it is false
680 if the instruction cannot (TM). */
683 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
687 /* ??? Fixme: should work on CONST_DOUBLE as well. */
688 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
691 /* Selected bits all zero: CC0.
692 e.g.: int a; if ((a & (16 + 128)) == 0) */
693 if (INTVAL (op2) == 0)
696 /* Selected bits all one: CC3.
697 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
698 if (INTVAL (op2) == INTVAL (op1))
701 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
703 if ((a & (16 + 128)) == 16) -> CCT1
704 if ((a & (16 + 128)) == 128) -> CCT2 */
707 bit1 = exact_log2 (INTVAL (op2));
708 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
709 if (bit0 != -1 && bit1 != -1)
710 return bit0 > bit1 ? CCT1mode : CCT2mode;
716 /* Given a comparison code OP (EQ, NE, etc.) and the operands
717 OP0 and OP1 of a COMPARE, return the mode to be used for the
721 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
727 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
728 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
730 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
731 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
733 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
734 || GET_CODE (op1) == NEG)
735 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
738 if (GET_CODE (op0) == AND)
740 /* Check whether we can potentially do it via TM. */
741 enum machine_mode ccmode;
742 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
743 if (ccmode != VOIDmode)
745 /* Relax CCTmode to CCZmode to allow fall-back to AND
746 if that turns out to be beneficial. */
747 return ccmode == CCTmode ? CCZmode : ccmode;
751 if (register_operand (op0, HImode)
752 && GET_CODE (op1) == CONST_INT
753 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
755 if (register_operand (op0, QImode)
756 && GET_CODE (op1) == CONST_INT
757 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
766 /* The only overflow condition of NEG and ABS happens when
767 -INT_MAX is used as parameter, which stays negative. So
768 we have an overflow from a positive value to a negative.
769 Using CCAP mode the resulting cc can be used for comparisons. */
770 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
771 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
774 /* If constants are involved in an add instruction it is possible to use
775 the resulting cc for comparisons with zero. Knowing the sign of the
776 constant the overflow behavior gets predictable. e.g.:
777 int a, b; if ((b = a + c) > 0)
778 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
779 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
780 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
781 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
782 /* Avoid INT32_MIN on 32 bit. */
783 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
785 if (INTVAL (XEXP((op0), 1)) < 0)
799 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
800 && GET_CODE (op1) != CONST_INT)
806 if (GET_CODE (op0) == PLUS
807 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
810 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
811 && GET_CODE (op1) != CONST_INT)
817 if (GET_CODE (op0) == MINUS
818 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
821 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
822 && GET_CODE (op1) != CONST_INT)
831 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
832 that we can implement more efficiently. */
835 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
836 bool op0_preserve_value)
838 if (op0_preserve_value)
841 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
842 if ((*code == EQ || *code == NE)
843 && *op1 == const0_rtx
844 && GET_CODE (*op0) == ZERO_EXTRACT
845 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
846 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
847 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
849 rtx inner = XEXP (*op0, 0);
850 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
851 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
852 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
854 if (len > 0 && len < modesize
855 && pos >= 0 && pos + len <= modesize
856 && modesize <= HOST_BITS_PER_WIDE_INT)
858 unsigned HOST_WIDE_INT block;
859 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
860 block <<= modesize - pos - len;
862 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
863 gen_int_mode (block, GET_MODE (inner)));
867 /* Narrow AND of memory against immediate to enable TM. */
868 if ((*code == EQ || *code == NE)
869 && *op1 == const0_rtx
870 && GET_CODE (*op0) == AND
871 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
872 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
874 rtx inner = XEXP (*op0, 0);
875 rtx mask = XEXP (*op0, 1);
877 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
878 if (GET_CODE (inner) == SUBREG
879 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
880 && (GET_MODE_SIZE (GET_MODE (inner))
881 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
883 & GET_MODE_MASK (GET_MODE (inner))
884 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
886 inner = SUBREG_REG (inner);
888 /* Do not change volatile MEMs. */
889 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
891 int part = s390_single_part (XEXP (*op0, 1),
892 GET_MODE (inner), QImode, 0);
895 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
896 inner = adjust_address_nv (inner, QImode, part);
897 *op0 = gen_rtx_AND (QImode, inner, mask);
902 /* Narrow comparisons against 0xffff to HImode if possible. */
903 if ((*code == EQ || *code == NE)
904 && GET_CODE (*op1) == CONST_INT
905 && INTVAL (*op1) == 0xffff
906 && SCALAR_INT_MODE_P (GET_MODE (*op0))
907 && (nonzero_bits (*op0, GET_MODE (*op0))
908 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
910 *op0 = gen_lowpart (HImode, *op0);
914 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
915 if (GET_CODE (*op0) == UNSPEC
916 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
917 && XVECLEN (*op0, 0) == 1
918 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
919 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
920 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
921 && *op1 == const0_rtx)
923 enum rtx_code new_code = UNKNOWN;
926 case EQ: new_code = EQ; break;
927 case NE: new_code = NE; break;
928 case LT: new_code = GTU; break;
929 case GT: new_code = LTU; break;
930 case LE: new_code = GEU; break;
931 case GE: new_code = LEU; break;
935 if (new_code != UNKNOWN)
937 *op0 = XVECEXP (*op0, 0, 0);
942 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
943 if (GET_CODE (*op0) == UNSPEC
944 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
945 && XVECLEN (*op0, 0) == 1
946 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
947 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
948 && CONST_INT_P (*op1))
950 enum rtx_code new_code = UNKNOWN;
951 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
957 case EQ: new_code = EQ; break;
958 case NE: new_code = NE; break;
965 if (new_code != UNKNOWN)
967 /* For CCRAWmode put the required cc mask into the second
969 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
970 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
971 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
972 *op0 = XVECEXP (*op0, 0, 0);
977 /* Simplify cascaded EQ, NE with const0_rtx. */
978 if ((*code == NE || *code == EQ)
979 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
980 && GET_MODE (*op0) == SImode
981 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
982 && REG_P (XEXP (*op0, 0))
983 && XEXP (*op0, 1) == const0_rtx
984 && *op1 == const0_rtx)
986 if ((*code == EQ && GET_CODE (*op0) == NE)
987 || (*code == NE && GET_CODE (*op0) == EQ))
991 *op0 = XEXP (*op0, 0);
994 /* Prefer register over memory as first operand. */
995 if (MEM_P (*op0) && REG_P (*op1))
997 rtx tem = *op0; *op0 = *op1; *op1 = tem;
998 *code = (int)swap_condition ((enum rtx_code)*code);
1002 /* Emit a compare instruction suitable to implement the comparison
1003 OP0 CODE OP1. Return the correct condition RTL to be placed in
1004 the IF_THEN_ELSE of the conditional branch testing the result. */
1007 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1009 enum machine_mode mode = s390_select_ccmode (code, op0, op1);
1012 /* Do not output a redundant compare instruction if a compare_and_swap
1013 pattern already computed the result and the machine modes are compatible. */
1014 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1016 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1022 cc = gen_rtx_REG (mode, CC_REGNUM);
1023 emit_insn (gen_rtx_SET (VOIDmode, cc, gen_rtx_COMPARE (mode, op0, op1)));
1026 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1029 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1031 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1032 conditional branch testing the result. */
1035 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1036 rtx cmp, rtx new_rtx)
1038 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1039 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1043 /* Emit a jump instruction to TARGET and return it. If COND is
1044 NULL_RTX, emit an unconditional jump, else a conditional jump under
1048 s390_emit_jump (rtx target, rtx cond)
1052 target = gen_rtx_LABEL_REF (VOIDmode, target);
1054 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1056 insn = gen_rtx_SET (VOIDmode, pc_rtx, target);
1057 return emit_jump_insn (insn);
1060 /* Return branch condition mask to implement a branch
1061 specified by CODE. Return -1 for invalid comparisons. */
1064 s390_branch_condition_mask (rtx code)
1066 const int CC0 = 1 << 3;
1067 const int CC1 = 1 << 2;
1068 const int CC2 = 1 << 1;
1069 const int CC3 = 1 << 0;
1071 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1072 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1073 gcc_assert (XEXP (code, 1) == const0_rtx
1074 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1075 && CONST_INT_P (XEXP (code, 1))));
1078 switch (GET_MODE (XEXP (code, 0)))
1082 switch (GET_CODE (code))
1084 case EQ: return CC0;
1085 case NE: return CC1 | CC2 | CC3;
1091 switch (GET_CODE (code))
1093 case EQ: return CC1;
1094 case NE: return CC0 | CC2 | CC3;
1100 switch (GET_CODE (code))
1102 case EQ: return CC2;
1103 case NE: return CC0 | CC1 | CC3;
1109 switch (GET_CODE (code))
1111 case EQ: return CC3;
1112 case NE: return CC0 | CC1 | CC2;
1118 switch (GET_CODE (code))
1120 case EQ: return CC0 | CC2;
1121 case NE: return CC1 | CC3;
1127 switch (GET_CODE (code))
1129 case LTU: return CC2 | CC3; /* carry */
1130 case GEU: return CC0 | CC1; /* no carry */
1136 switch (GET_CODE (code))
1138 case GTU: return CC0 | CC1; /* borrow */
1139 case LEU: return CC2 | CC3; /* no borrow */
1145 switch (GET_CODE (code))
1147 case EQ: return CC0 | CC2;
1148 case NE: return CC1 | CC3;
1149 case LTU: return CC1;
1150 case GTU: return CC3;
1151 case LEU: return CC1 | CC2;
1152 case GEU: return CC2 | CC3;
1157 switch (GET_CODE (code))
1159 case EQ: return CC0;
1160 case NE: return CC1 | CC2 | CC3;
1161 case LTU: return CC1;
1162 case GTU: return CC2;
1163 case LEU: return CC0 | CC1;
1164 case GEU: return CC0 | CC2;
1170 switch (GET_CODE (code))
1172 case EQ: return CC0;
1173 case NE: return CC2 | CC1 | CC3;
1174 case LTU: return CC2;
1175 case GTU: return CC1;
1176 case LEU: return CC0 | CC2;
1177 case GEU: return CC0 | CC1;
1183 switch (GET_CODE (code))
1185 case EQ: return CC0;
1186 case NE: return CC1 | CC2 | CC3;
1187 case LT: return CC1 | CC3;
1188 case GT: return CC2;
1189 case LE: return CC0 | CC1 | CC3;
1190 case GE: return CC0 | CC2;
1196 switch (GET_CODE (code))
1198 case EQ: return CC0;
1199 case NE: return CC1 | CC2 | CC3;
1200 case LT: return CC1;
1201 case GT: return CC2 | CC3;
1202 case LE: return CC0 | CC1;
1203 case GE: return CC0 | CC2 | CC3;
1209 switch (GET_CODE (code))
1211 case EQ: return CC0;
1212 case NE: return CC1 | CC2 | CC3;
1213 case LT: return CC1;
1214 case GT: return CC2;
1215 case LE: return CC0 | CC1;
1216 case GE: return CC0 | CC2;
1217 case UNORDERED: return CC3;
1218 case ORDERED: return CC0 | CC1 | CC2;
1219 case UNEQ: return CC0 | CC3;
1220 case UNLT: return CC1 | CC3;
1221 case UNGT: return CC2 | CC3;
1222 case UNLE: return CC0 | CC1 | CC3;
1223 case UNGE: return CC0 | CC2 | CC3;
1224 case LTGT: return CC1 | CC2;
1230 switch (GET_CODE (code))
1232 case EQ: return CC0;
1233 case NE: return CC2 | CC1 | CC3;
1234 case LT: return CC2;
1235 case GT: return CC1;
1236 case LE: return CC0 | CC2;
1237 case GE: return CC0 | CC1;
1238 case UNORDERED: return CC3;
1239 case ORDERED: return CC0 | CC2 | CC1;
1240 case UNEQ: return CC0 | CC3;
1241 case UNLT: return CC2 | CC3;
1242 case UNGT: return CC1 | CC3;
1243 case UNLE: return CC0 | CC2 | CC3;
1244 case UNGE: return CC0 | CC1 | CC3;
1245 case LTGT: return CC2 | CC1;
1251 switch (GET_CODE (code))
1254 return INTVAL (XEXP (code, 1));
1256 return (INTVAL (XEXP (code, 1))) ^ 0xf;
1267 /* Return branch condition mask to implement a compare and branch
1268 specified by CODE. Return -1 for invalid comparisons. */
1271 s390_compare_and_branch_condition_mask (rtx code)
1273 const int CC0 = 1 << 3;
1274 const int CC1 = 1 << 2;
1275 const int CC2 = 1 << 1;
1277 switch (GET_CODE (code))
1301 /* If INV is false, return assembler mnemonic string to implement
1302 a branch specified by CODE. If INV is true, return mnemonic
1303 for the corresponding inverted branch. */
1306 s390_branch_condition_mnemonic (rtx code, int inv)
1310 static const char *const mnemonic[16] =
1312 NULL, "o", "h", "nle",
1313 "l", "nhe", "lh", "ne",
1314 "e", "nlh", "he", "nl",
1315 "le", "nh", "no", NULL
1318 if (GET_CODE (XEXP (code, 0)) == REG
1319 && REGNO (XEXP (code, 0)) == CC_REGNUM
1320 && (XEXP (code, 1) == const0_rtx
1321 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1322 && CONST_INT_P (XEXP (code, 1)))))
1323 mask = s390_branch_condition_mask (code);
1325 mask = s390_compare_and_branch_condition_mask (code);
1327 gcc_assert (mask >= 0);
1332 gcc_assert (mask >= 1 && mask <= 14);
1334 return mnemonic[mask];
1337 /* Return the part of op which has a value different from def.
1338 The size of the part is determined by mode.
1339 Use this function only if you already know that op really
1340 contains such a part. */
1342 unsigned HOST_WIDE_INT
1343 s390_extract_part (rtx op, enum machine_mode mode, int def)
1345 unsigned HOST_WIDE_INT value = 0;
1346 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
1347 int part_bits = GET_MODE_BITSIZE (mode);
1348 unsigned HOST_WIDE_INT part_mask
1349 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
1352 for (i = 0; i < max_parts; i++)
1355 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1357 value >>= part_bits;
1359 if ((value & part_mask) != (def & part_mask))
1360 return value & part_mask;
1366 /* If OP is an integer constant of mode MODE with exactly one
1367 part of mode PART_MODE unequal to DEF, return the number of that
1368 part. Otherwise, return -1. */
1371 s390_single_part (rtx op,
1372 enum machine_mode mode,
1373 enum machine_mode part_mode,
1376 unsigned HOST_WIDE_INT value = 0;
1377 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
1378 unsigned HOST_WIDE_INT part_mask
1379 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
1382 if (GET_CODE (op) != CONST_INT)
1385 for (i = 0; i < n_parts; i++)
1388 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1390 value >>= GET_MODE_BITSIZE (part_mode);
1392 if ((value & part_mask) != (def & part_mask))
1400 return part == -1 ? -1 : n_parts - 1 - part;
1403 /* Return true if IN contains a contiguous bitfield in the lower SIZE
1404 bits and no other bits are set in IN. POS and LENGTH can be used
1405 to obtain the start position and the length of the bitfield.
1407 POS gives the position of the first bit of the bitfield counting
1408 from the lowest order bit starting with zero. In order to use this
1409 value for S/390 instructions this has to be converted to "bits big
1413 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
1414 int *pos, int *length)
1419 unsigned HOST_WIDE_INT mask = 1ULL;
1420 bool contiguous = false;
1422 for (i = 0; i < size; mask <<= 1, i++)
1446 /* Calculate a mask for all bits beyond the contiguous bits. */
1447 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
1452 if (tmp_length + tmp_pos - 1 > size)
1456 *length = tmp_length;
1464 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
1465 equivalent to a shift followed by the AND. In particular, CONTIG
1466 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
1467 for ROTL indicate a rotate to the right. */
1470 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
1475 ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
1478 return ((rotl >= 0 && rotl <= pos)
1479 || (rotl < 0 && -rotl <= bitsize - len - pos));
1482 /* Check whether we can (and want to) split a double-word
1483 move in mode MODE from SRC to DST into two single-word
1484 moves, moving the subword FIRST_SUBWORD first. */
1487 s390_split_ok_p (rtx dst, rtx src, enum machine_mode mode, int first_subword)
1489 /* Floating point registers cannot be split. */
1490 if (FP_REG_P (src) || FP_REG_P (dst))
1493 /* We don't need to split if operands are directly accessible. */
1494 if (s_operand (src, mode) || s_operand (dst, mode))
1497 /* Non-offsettable memory references cannot be split. */
1498 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
1499 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
1502 /* Moving the first subword must not clobber a register
1503 needed to move the second subword. */
1504 if (register_operand (dst, mode))
1506 rtx subreg = operand_subword (dst, first_subword, 0, mode);
1507 if (reg_overlap_mentioned_p (subreg, src))
1514 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
1515 and [MEM2, MEM2 + SIZE] do overlap and false
1519 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
1521 rtx addr1, addr2, addr_delta;
1522 HOST_WIDE_INT delta;
1524 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1530 addr1 = XEXP (mem1, 0);
1531 addr2 = XEXP (mem2, 0);
1533 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1535 /* This overlapping check is used by peepholes merging memory block operations.
1536 Overlapping operations would otherwise be recognized by the S/390 hardware
1537 and would fall back to a slower implementation. Allowing overlapping
1538 operations would lead to slow code but not to wrong code. Therefore we are
1539 somewhat optimistic if we cannot prove that the memory blocks are
1541 That's why we return false here although this may accept operations on
1542 overlapping memory areas. */
1543 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
1546 delta = INTVAL (addr_delta);
1549 || (delta > 0 && delta < size)
1550 || (delta < 0 && -delta < size))
1556 /* Check whether the address of memory reference MEM2 equals exactly
1557 the address of memory reference MEM1 plus DELTA. Return true if
1558 we can prove this to be the case, false otherwise. */
1561 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
1563 rtx addr1, addr2, addr_delta;
1565 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1568 addr1 = XEXP (mem1, 0);
1569 addr2 = XEXP (mem2, 0);
1571 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1572 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
1578 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
1581 s390_expand_logical_operator (enum rtx_code code, enum machine_mode mode,
1584 enum machine_mode wmode = mode;
1585 rtx dst = operands[0];
1586 rtx src1 = operands[1];
1587 rtx src2 = operands[2];
1590 /* If we cannot handle the operation directly, use a temp register. */
1591 if (!s390_logical_operator_ok_p (operands))
1592 dst = gen_reg_rtx (mode);
1594 /* QImode and HImode patterns make sense only if we have a destination
1595 in memory. Otherwise perform the operation in SImode. */
1596 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
1599 /* Widen operands if required. */
1602 if (GET_CODE (dst) == SUBREG
1603 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
1605 else if (REG_P (dst))
1606 dst = gen_rtx_SUBREG (wmode, dst, 0);
1608 dst = gen_reg_rtx (wmode);
1610 if (GET_CODE (src1) == SUBREG
1611 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
1613 else if (GET_MODE (src1) != VOIDmode)
1614 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
1616 if (GET_CODE (src2) == SUBREG
1617 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
1619 else if (GET_MODE (src2) != VOIDmode)
1620 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
1623 /* Emit the instruction. */
1624 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
1625 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
1626 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
1628 /* Fix up the destination if needed. */
1629 if (dst != operands[0])
1630 emit_move_insn (operands[0], gen_lowpart (mode, dst));
1633 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
1636 s390_logical_operator_ok_p (rtx *operands)
1638 /* If the destination operand is in memory, it needs to coincide
1639 with one of the source operands. After reload, it has to be
1640 the first source operand. */
1641 if (GET_CODE (operands[0]) == MEM)
1642 return rtx_equal_p (operands[0], operands[1])
1643 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
1648 /* Narrow logical operation CODE of memory operand MEMOP with immediate
1649 operand IMMOP to switch from SS to SI type instructions. */
1652 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
1654 int def = code == AND ? -1 : 0;
1658 gcc_assert (GET_CODE (*memop) == MEM);
1659 gcc_assert (!MEM_VOLATILE_P (*memop));
1661 mask = s390_extract_part (*immop, QImode, def);
1662 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
1663 gcc_assert (part >= 0);
1665 *memop = adjust_address (*memop, QImode, part);
1666 *immop = gen_int_mode (mask, QImode);
1670 /* How to allocate a 'struct machine_function'. */
1672 static struct machine_function *
1673 s390_init_machine_status (void)
1675 return ggc_cleared_alloc<machine_function> ();
1678 /* Map for smallest class containing reg regno. */
1680 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
1681 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1682 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1683 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1684 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1685 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1686 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1687 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1688 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1689 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS,
1690 ACCESS_REGS, ACCESS_REGS
1693 /* Return attribute type of insn. */
1695 static enum attr_type
1696 s390_safe_attr_type (rtx insn)
1698 if (recog_memoized (insn) >= 0)
1699 return get_attr_type (insn);
1704 /* Return true if DISP is a valid short displacement. */
1707 s390_short_displacement (rtx disp)
1709 /* No displacement is OK. */
1713 /* Without the long displacement facility we don't need to
1714 distingiush between long and short displacement. */
1715 if (!TARGET_LONG_DISPLACEMENT)
1718 /* Integer displacement in range. */
1719 if (GET_CODE (disp) == CONST_INT)
1720 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
1722 /* GOT offset is not OK, the GOT can be large. */
1723 if (GET_CODE (disp) == CONST
1724 && GET_CODE (XEXP (disp, 0)) == UNSPEC
1725 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
1726 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
1729 /* All other symbolic constants are literal pool references,
1730 which are OK as the literal pool must be small. */
1731 if (GET_CODE (disp) == CONST)
1737 /* Decompose a RTL expression ADDR for a memory address into
1738 its components, returned in OUT.
1740 Returns false if ADDR is not a valid memory address, true
1741 otherwise. If OUT is NULL, don't return the components,
1742 but check for validity only.
1744 Note: Only addresses in canonical form are recognized.
1745 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
1746 canonical form so that they will be recognized. */
1749 s390_decompose_address (rtx addr, struct s390_address *out)
1751 HOST_WIDE_INT offset = 0;
1752 rtx base = NULL_RTX;
1753 rtx indx = NULL_RTX;
1754 rtx disp = NULL_RTX;
1756 bool pointer = false;
1757 bool base_ptr = false;
1758 bool indx_ptr = false;
1759 bool literal_pool = false;
1761 /* We may need to substitute the literal pool base register into the address
1762 below. However, at this point we do not know which register is going to
1763 be used as base, so we substitute the arg pointer register. This is going
1764 to be treated as holding a pointer below -- it shouldn't be used for any
1766 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
1768 /* Decompose address into base + index + displacement. */
1770 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
1773 else if (GET_CODE (addr) == PLUS)
1775 rtx op0 = XEXP (addr, 0);
1776 rtx op1 = XEXP (addr, 1);
1777 enum rtx_code code0 = GET_CODE (op0);
1778 enum rtx_code code1 = GET_CODE (op1);
1780 if (code0 == REG || code0 == UNSPEC)
1782 if (code1 == REG || code1 == UNSPEC)
1784 indx = op0; /* index + base */
1790 base = op0; /* base + displacement */
1795 else if (code0 == PLUS)
1797 indx = XEXP (op0, 0); /* index + base + disp */
1798 base = XEXP (op0, 1);
1809 disp = addr; /* displacement */
1811 /* Extract integer part of displacement. */
1815 if (GET_CODE (disp) == CONST_INT)
1817 offset = INTVAL (disp);
1820 else if (GET_CODE (disp) == CONST
1821 && GET_CODE (XEXP (disp, 0)) == PLUS
1822 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
1824 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
1825 disp = XEXP (XEXP (disp, 0), 0);
1829 /* Strip off CONST here to avoid special case tests later. */
1830 if (disp && GET_CODE (disp) == CONST)
1831 disp = XEXP (disp, 0);
1833 /* We can convert literal pool addresses to
1834 displacements by basing them off the base register. */
1835 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
1837 /* Either base or index must be free to hold the base register. */
1839 base = fake_pool_base, literal_pool = true;
1841 indx = fake_pool_base, literal_pool = true;
1845 /* Mark up the displacement. */
1846 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
1847 UNSPEC_LTREL_OFFSET);
1850 /* Validate base register. */
1853 if (GET_CODE (base) == UNSPEC)
1854 switch (XINT (base, 1))
1858 disp = gen_rtx_UNSPEC (Pmode,
1859 gen_rtvec (1, XVECEXP (base, 0, 0)),
1860 UNSPEC_LTREL_OFFSET);
1864 base = XVECEXP (base, 0, 1);
1867 case UNSPEC_LTREL_BASE:
1868 if (XVECLEN (base, 0) == 1)
1869 base = fake_pool_base, literal_pool = true;
1871 base = XVECEXP (base, 0, 1);
1879 || (GET_MODE (base) != SImode
1880 && GET_MODE (base) != Pmode))
1883 if (REGNO (base) == STACK_POINTER_REGNUM
1884 || REGNO (base) == FRAME_POINTER_REGNUM
1885 || ((reload_completed || reload_in_progress)
1886 && frame_pointer_needed
1887 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
1888 || REGNO (base) == ARG_POINTER_REGNUM
1890 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
1891 pointer = base_ptr = true;
1893 if ((reload_completed || reload_in_progress)
1894 && base == cfun->machine->base_reg)
1895 pointer = base_ptr = literal_pool = true;
1898 /* Validate index register. */
1901 if (GET_CODE (indx) == UNSPEC)
1902 switch (XINT (indx, 1))
1906 disp = gen_rtx_UNSPEC (Pmode,
1907 gen_rtvec (1, XVECEXP (indx, 0, 0)),
1908 UNSPEC_LTREL_OFFSET);
1912 indx = XVECEXP (indx, 0, 1);
1915 case UNSPEC_LTREL_BASE:
1916 if (XVECLEN (indx, 0) == 1)
1917 indx = fake_pool_base, literal_pool = true;
1919 indx = XVECEXP (indx, 0, 1);
1927 || (GET_MODE (indx) != SImode
1928 && GET_MODE (indx) != Pmode))
1931 if (REGNO (indx) == STACK_POINTER_REGNUM
1932 || REGNO (indx) == FRAME_POINTER_REGNUM
1933 || ((reload_completed || reload_in_progress)
1934 && frame_pointer_needed
1935 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
1936 || REGNO (indx) == ARG_POINTER_REGNUM
1938 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
1939 pointer = indx_ptr = true;
1941 if ((reload_completed || reload_in_progress)
1942 && indx == cfun->machine->base_reg)
1943 pointer = indx_ptr = literal_pool = true;
1946 /* Prefer to use pointer as base, not index. */
1947 if (base && indx && !base_ptr
1948 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
1955 /* Validate displacement. */
1958 /* If virtual registers are involved, the displacement will change later
1959 anyway as the virtual registers get eliminated. This could make a
1960 valid displacement invalid, but it is more likely to make an invalid
1961 displacement valid, because we sometimes access the register save area
1962 via negative offsets to one of those registers.
1963 Thus we don't check the displacement for validity here. If after
1964 elimination the displacement turns out to be invalid after all,
1965 this is fixed up by reload in any case. */
1966 /* LRA maintains always displacements up to date and we need to
1967 know the displacement is right during all LRA not only at the
1968 final elimination. */
1970 || (base != arg_pointer_rtx
1971 && indx != arg_pointer_rtx
1972 && base != return_address_pointer_rtx
1973 && indx != return_address_pointer_rtx
1974 && base != frame_pointer_rtx
1975 && indx != frame_pointer_rtx
1976 && base != virtual_stack_vars_rtx
1977 && indx != virtual_stack_vars_rtx))
1978 if (!DISP_IN_RANGE (offset))
1983 /* All the special cases are pointers. */
1986 /* In the small-PIC case, the linker converts @GOT
1987 and @GOTNTPOFF offsets to possible displacements. */
1988 if (GET_CODE (disp) == UNSPEC
1989 && (XINT (disp, 1) == UNSPEC_GOT
1990 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
1996 /* Accept pool label offsets. */
1997 else if (GET_CODE (disp) == UNSPEC
1998 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2001 /* Accept literal pool references. */
2002 else if (GET_CODE (disp) == UNSPEC
2003 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2005 /* In case CSE pulled a non literal pool reference out of
2006 the pool we have to reject the address. This is
2007 especially important when loading the GOT pointer on non
2008 zarch CPUs. In this case the literal pool contains an lt
2009 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2010 will most likely exceed the displacement. */
2011 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2012 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2015 orig_disp = gen_rtx_CONST (Pmode, disp);
2018 /* If we have an offset, make sure it does not
2019 exceed the size of the constant pool entry. */
2020 rtx sym = XVECEXP (disp, 0, 0);
2021 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2024 orig_disp = plus_constant (Pmode, orig_disp, offset);
2039 out->disp = orig_disp;
2040 out->pointer = pointer;
2041 out->literal_pool = literal_pool;
2047 /* Decompose a RTL expression OP for a shift count into its components,
2048 and return the base register in BASE and the offset in OFFSET.
2050 Return true if OP is a valid shift count, false if not. */
2053 s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2055 HOST_WIDE_INT off = 0;
2057 /* We can have an integer constant, an address register,
2058 or a sum of the two. */
2059 if (GET_CODE (op) == CONST_INT)
2064 if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
2066 off = INTVAL (XEXP (op, 1));
2069 while (op && GET_CODE (op) == SUBREG)
2070 op = SUBREG_REG (op);
2072 if (op && GET_CODE (op) != REG)
2084 /* Return true if CODE is a valid address without index. */
2087 s390_legitimate_address_without_index_p (rtx op)
2089 struct s390_address addr;
2091 if (!s390_decompose_address (XEXP (op, 0), &addr))
2100 /* Return TRUE if ADDR is an operand valid for a load/store relative
2101 instruction. Be aware that the alignment of the operand needs to
2102 be checked separately.
2103 Valid addresses are single references or a sum of a reference and a
2104 constant integer. Return these parts in SYMREF and ADDEND. You can
2105 pass NULL in REF and/or ADDEND if you are not interested in these
2106 values. Literal pool references are *not* considered symbol
2110 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
2112 HOST_WIDE_INT tmpaddend = 0;
2114 if (GET_CODE (addr) == CONST)
2115 addr = XEXP (addr, 0);
2117 if (GET_CODE (addr) == PLUS)
2119 if (!CONST_INT_P (XEXP (addr, 1)))
2122 tmpaddend = INTVAL (XEXP (addr, 1));
2123 addr = XEXP (addr, 0);
2126 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
2127 || (GET_CODE (addr) == UNSPEC
2128 && (XINT (addr, 1) == UNSPEC_GOTENT
2129 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
2134 *addend = tmpaddend;
2141 /* Return true if the address in OP is valid for constraint letter C
2142 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
2143 pool MEMs should be accepted. Only the Q, R, S, T constraint
2144 letters are allowed for C. */
2147 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
2149 struct s390_address addr;
2150 bool decomposed = false;
2152 /* This check makes sure that no symbolic address (except literal
2153 pool references) are accepted by the R or T constraints. */
2154 if (s390_loadrelative_operand_p (op, NULL, NULL))
2157 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
2160 if (!s390_decompose_address (op, &addr))
2162 if (addr.literal_pool)
2169 case 'Q': /* no index short displacement */
2170 if (!decomposed && !s390_decompose_address (op, &addr))
2174 if (!s390_short_displacement (addr.disp))
2178 case 'R': /* with index short displacement */
2179 if (TARGET_LONG_DISPLACEMENT)
2181 if (!decomposed && !s390_decompose_address (op, &addr))
2183 if (!s390_short_displacement (addr.disp))
2186 /* Any invalid address here will be fixed up by reload,
2187 so accept it for the most generic constraint. */
2190 case 'S': /* no index long displacement */
2191 if (!TARGET_LONG_DISPLACEMENT)
2193 if (!decomposed && !s390_decompose_address (op, &addr))
2197 if (s390_short_displacement (addr.disp))
2201 case 'T': /* with index long displacement */
2202 if (!TARGET_LONG_DISPLACEMENT)
2204 /* Any invalid address here will be fixed up by reload,
2205 so accept it for the most generic constraint. */
2206 if ((decomposed || s390_decompose_address (op, &addr))
2207 && s390_short_displacement (addr.disp))
2217 /* Evaluates constraint strings described by the regular expression
2218 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
2219 the constraint given in STR, or 0 else. */
2222 s390_mem_constraint (const char *str, rtx op)
2229 /* Check for offsettable variants of memory constraints. */
2230 if (!MEM_P (op) || MEM_VOLATILE_P (op))
2232 if ((reload_completed || reload_in_progress)
2233 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
2235 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
2237 /* Check for non-literal-pool variants of memory constraints. */
2240 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
2245 if (GET_CODE (op) != MEM)
2247 return s390_check_qrst_address (c, XEXP (op, 0), true);
2249 return (s390_check_qrst_address ('Q', op, true)
2250 || s390_check_qrst_address ('R', op, true));
2252 return (s390_check_qrst_address ('S', op, true)
2253 || s390_check_qrst_address ('T', op, true));
2255 /* Simply check for the basic form of a shift count. Reload will
2256 take care of making sure we have a proper base register. */
2257 if (!s390_decompose_shift_count (op, NULL, NULL))
2261 return s390_check_qrst_address (str[1], op, true);
2269 /* Evaluates constraint strings starting with letter O. Input
2270 parameter C is the second letter following the "O" in the constraint
2271 string. Returns 1 if VALUE meets the respective constraint and 0
2275 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
2283 return trunc_int_for_mode (value, SImode) == value;
2287 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
2290 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
2298 /* Evaluates constraint strings starting with letter N. Parameter STR
2299 contains the letters following letter "N" in the constraint string.
2300 Returns true if VALUE matches the constraint. */
2303 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
2305 enum machine_mode mode, part_mode;
2307 int part, part_goal;
2313 part_goal = str[0] - '0';
2357 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
2360 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
2363 if (part_goal != -1 && part_goal != part)
2370 /* Returns true if the input parameter VALUE is a float zero. */
2373 s390_float_const_zero_p (rtx value)
2375 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
2376 && value == CONST0_RTX (GET_MODE (value)));
2379 /* Implement TARGET_REGISTER_MOVE_COST. */
2382 s390_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2383 reg_class_t from, reg_class_t to)
2385 /* On s390, copy between fprs and gprs is expensive as long as no
2386 ldgr/lgdr can be used. */
2387 if ((!TARGET_Z10 || GET_MODE_SIZE (mode) != 8)
2388 && ((reg_classes_intersect_p (from, GENERAL_REGS)
2389 && reg_classes_intersect_p (to, FP_REGS))
2390 || (reg_classes_intersect_p (from, FP_REGS)
2391 && reg_classes_intersect_p (to, GENERAL_REGS))))
2397 /* Implement TARGET_MEMORY_MOVE_COST. */
2400 s390_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2401 reg_class_t rclass ATTRIBUTE_UNUSED,
2402 bool in ATTRIBUTE_UNUSED)
2407 /* Compute a (partial) cost for rtx X. Return true if the complete
2408 cost has been computed, and false if subexpressions should be
2409 scanned. In either case, *TOTAL contains the cost result.
2410 CODE contains GET_CODE (x), OUTER_CODE contains the code
2411 of the superexpression of x. */
2414 s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
2415 int *total, bool speed ATTRIBUTE_UNUSED)
2438 *total = COSTS_N_INSNS (1);
2443 *total = COSTS_N_INSNS (1);
2447 switch (GET_MODE (x))
2451 rtx left = XEXP (x, 0);
2452 rtx right = XEXP (x, 1);
2453 if (GET_CODE (right) == CONST_INT
2454 && CONST_OK_FOR_K (INTVAL (right)))
2455 *total = s390_cost->mhi;
2456 else if (GET_CODE (left) == SIGN_EXTEND)
2457 *total = s390_cost->mh;
2459 *total = s390_cost->ms; /* msr, ms, msy */
2464 rtx left = XEXP (x, 0);
2465 rtx right = XEXP (x, 1);
2468 if (GET_CODE (right) == CONST_INT
2469 && CONST_OK_FOR_K (INTVAL (right)))
2470 *total = s390_cost->mghi;
2471 else if (GET_CODE (left) == SIGN_EXTEND)
2472 *total = s390_cost->msgf;
2474 *total = s390_cost->msg; /* msgr, msg */
2476 else /* TARGET_31BIT */
2478 if (GET_CODE (left) == SIGN_EXTEND
2479 && GET_CODE (right) == SIGN_EXTEND)
2480 /* mulsidi case: mr, m */
2481 *total = s390_cost->m;
2482 else if (GET_CODE (left) == ZERO_EXTEND
2483 && GET_CODE (right) == ZERO_EXTEND
2484 && TARGET_CPU_ZARCH)
2485 /* umulsidi case: ml, mlr */
2486 *total = s390_cost->ml;
2488 /* Complex calculation is required. */
2489 *total = COSTS_N_INSNS (40);
2495 *total = s390_cost->mult_df;
2498 *total = s390_cost->mxbr;
2506 switch (GET_MODE (x))
2509 *total = s390_cost->madbr;
2512 *total = s390_cost->maebr;
2517 /* Negate in the third argument is free: FMSUB. */
2518 if (GET_CODE (XEXP (x, 2)) == NEG)
2520 *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed)
2521 + rtx_cost (XEXP (x, 1), FMA, 1, speed)
2522 + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed));
2529 if (GET_MODE (x) == TImode) /* 128 bit division */
2530 *total = s390_cost->dlgr;
2531 else if (GET_MODE (x) == DImode)
2533 rtx right = XEXP (x, 1);
2534 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2535 *total = s390_cost->dlr;
2536 else /* 64 by 64 bit division */
2537 *total = s390_cost->dlgr;
2539 else if (GET_MODE (x) == SImode) /* 32 bit division */
2540 *total = s390_cost->dlr;
2545 if (GET_MODE (x) == DImode)
2547 rtx right = XEXP (x, 1);
2548 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2550 *total = s390_cost->dsgfr;
2552 *total = s390_cost->dr;
2553 else /* 64 by 64 bit division */
2554 *total = s390_cost->dsgr;
2556 else if (GET_MODE (x) == SImode) /* 32 bit division */
2557 *total = s390_cost->dlr;
2558 else if (GET_MODE (x) == SFmode)
2560 *total = s390_cost->debr;
2562 else if (GET_MODE (x) == DFmode)
2564 *total = s390_cost->ddbr;
2566 else if (GET_MODE (x) == TFmode)
2568 *total = s390_cost->dxbr;
2573 if (GET_MODE (x) == SFmode)
2574 *total = s390_cost->sqebr;
2575 else if (GET_MODE (x) == DFmode)
2576 *total = s390_cost->sqdbr;
2578 *total = s390_cost->sqxbr;
2583 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
2584 || outer_code == PLUS || outer_code == MINUS
2585 || outer_code == COMPARE)
2590 *total = COSTS_N_INSNS (1);
2591 if (GET_CODE (XEXP (x, 0)) == AND
2592 && GET_CODE (XEXP (x, 1)) == CONST_INT
2593 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2595 rtx op0 = XEXP (XEXP (x, 0), 0);
2596 rtx op1 = XEXP (XEXP (x, 0), 1);
2597 rtx op2 = XEXP (x, 1);
2599 if (memory_operand (op0, GET_MODE (op0))
2600 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
2602 if (register_operand (op0, GET_MODE (op0))
2603 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
2613 /* Return the cost of an address rtx ADDR. */
2616 s390_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED,
2617 addr_space_t as ATTRIBUTE_UNUSED,
2618 bool speed ATTRIBUTE_UNUSED)
2620 struct s390_address ad;
2621 if (!s390_decompose_address (addr, &ad))
2624 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
2627 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
2628 otherwise return 0. */
2631 tls_symbolic_operand (rtx op)
2633 if (GET_CODE (op) != SYMBOL_REF)
2635 return SYMBOL_REF_TLS_MODEL (op);
2638 /* Split DImode access register reference REG (on 64-bit) into its constituent
2639 low and high parts, and store them into LO and HI. Note that gen_lowpart/
2640 gen_highpart cannot be used as they assume all registers are word-sized,
2641 while our access registers have only half that size. */
2644 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
2646 gcc_assert (TARGET_64BIT);
2647 gcc_assert (ACCESS_REG_P (reg));
2648 gcc_assert (GET_MODE (reg) == DImode);
2649 gcc_assert (!(REGNO (reg) & 1));
2651 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
2652 *hi = gen_rtx_REG (SImode, REGNO (reg));
2655 /* Return true if OP contains a symbol reference */
2658 symbolic_reference_mentioned_p (rtx op)
2663 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
2666 fmt = GET_RTX_FORMAT (GET_CODE (op));
2667 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2673 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2674 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2678 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
2685 /* Return true if OP contains a reference to a thread-local symbol. */
2688 tls_symbolic_reference_mentioned_p (rtx op)
2693 if (GET_CODE (op) == SYMBOL_REF)
2694 return tls_symbolic_operand (op);
2696 fmt = GET_RTX_FORMAT (GET_CODE (op));
2697 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2703 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2704 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2708 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
2716 /* Return true if OP is a legitimate general operand when
2717 generating PIC code. It is given that flag_pic is on
2718 and that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2721 legitimate_pic_operand_p (rtx op)
2723 /* Accept all non-symbolic constants. */
2724 if (!SYMBOLIC_CONST (op))
2727 /* Reject everything else; must be handled
2728 via emit_symbolic_move. */
2732 /* Returns true if the constant value OP is a legitimate general operand.
2733 It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2736 s390_legitimate_constant_p (enum machine_mode mode, rtx op)
2738 /* Accept all non-symbolic constants. */
2739 if (!SYMBOLIC_CONST (op))
2742 /* Accept immediate LARL operands. */
2743 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
2746 /* Thread-local symbols are never legal constants. This is
2747 so that emit_call knows that computing such addresses
2748 might require a function call. */
2749 if (TLS_SYMBOLIC_CONST (op))
2752 /* In the PIC case, symbolic constants must *not* be
2753 forced into the literal pool. We accept them here,
2754 so that they will be handled by emit_symbolic_move. */
2758 /* All remaining non-PIC symbolic constants are
2759 forced into the literal pool. */
2763 /* Determine if it's legal to put X into the constant pool. This
2764 is not possible if X contains the address of a symbol that is
2765 not constant (TLS) or not known at final link time (PIC). */
2768 s390_cannot_force_const_mem (enum machine_mode mode, rtx x)
2770 switch (GET_CODE (x))
2774 /* Accept all non-symbolic constants. */
2778 /* Labels are OK iff we are non-PIC. */
2779 return flag_pic != 0;
2782 /* 'Naked' TLS symbol references are never OK,
2783 non-TLS symbols are OK iff we are non-PIC. */
2784 if (tls_symbolic_operand (x))
2787 return flag_pic != 0;
2790 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
2793 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
2794 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
2797 switch (XINT (x, 1))
2799 /* Only lt-relative or GOT-relative UNSPECs are OK. */
2800 case UNSPEC_LTREL_OFFSET:
2808 case UNSPEC_GOTNTPOFF:
2809 case UNSPEC_INDNTPOFF:
2812 /* If the literal pool shares the code section, be put
2813 execute template placeholders into the pool as well. */
2815 return TARGET_CPU_ZARCH;
2827 /* Returns true if the constant value OP is a legitimate general
2828 operand during and after reload. The difference to
2829 legitimate_constant_p is that this function will not accept
2830 a constant that would need to be forced to the literal pool
2831 before it can be used as operand.
2832 This function accepts all constants which can be loaded directly
2836 legitimate_reload_constant_p (rtx op)
2838 /* Accept la(y) operands. */
2839 if (GET_CODE (op) == CONST_INT
2840 && DISP_IN_RANGE (INTVAL (op)))
2843 /* Accept l(g)hi/l(g)fi operands. */
2844 if (GET_CODE (op) == CONST_INT
2845 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
2848 /* Accept lliXX operands. */
2850 && GET_CODE (op) == CONST_INT
2851 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2852 && s390_single_part (op, word_mode, HImode, 0) >= 0)
2856 && GET_CODE (op) == CONST_INT
2857 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2858 && s390_single_part (op, word_mode, SImode, 0) >= 0)
2861 /* Accept larl operands. */
2862 if (TARGET_CPU_ZARCH
2863 && larl_operand (op, VOIDmode))
2866 /* Accept floating-point zero operands that fit into a single GPR. */
2867 if (GET_CODE (op) == CONST_DOUBLE
2868 && s390_float_const_zero_p (op)
2869 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
2872 /* Accept double-word operands that can be split. */
2873 if (GET_CODE (op) == CONST_INT
2874 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
2876 enum machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
2877 rtx hi = operand_subword (op, 0, 0, dword_mode);
2878 rtx lo = operand_subword (op, 1, 0, dword_mode);
2879 return legitimate_reload_constant_p (hi)
2880 && legitimate_reload_constant_p (lo);
2883 /* Everything else cannot be handled without reload. */
2887 /* Returns true if the constant value OP is a legitimate fp operand
2888 during and after reload.
2889 This function accepts all constants which can be loaded directly
2893 legitimate_reload_fp_constant_p (rtx op)
2895 /* Accept floating-point zero operands if the load zero instruction
2896 can be used. Prior to z196 the load fp zero instruction caused a
2897 performance penalty if the result is used as BFP number. */
2899 && GET_CODE (op) == CONST_DOUBLE
2900 && s390_float_const_zero_p (op))
2906 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
2907 return the class of reg to actually use. */
2910 s390_preferred_reload_class (rtx op, reg_class_t rclass)
2912 switch (GET_CODE (op))
2914 /* Constants we cannot reload into general registers
2915 must be forced into the literal pool. */
2918 if (reg_class_subset_p (GENERAL_REGS, rclass)
2919 && legitimate_reload_constant_p (op))
2920 return GENERAL_REGS;
2921 else if (reg_class_subset_p (ADDR_REGS, rclass)
2922 && legitimate_reload_constant_p (op))
2924 else if (reg_class_subset_p (FP_REGS, rclass)
2925 && legitimate_reload_fp_constant_p (op))
2929 /* If a symbolic constant or a PLUS is reloaded,
2930 it is most likely being used as an address, so
2931 prefer ADDR_REGS. If 'class' is not a superset
2932 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
2934 /* Symrefs cannot be pushed into the literal pool with -fPIC
2935 so we *MUST NOT* return NO_REGS for these cases
2936 (s390_cannot_force_const_mem will return true).
2938 On the other hand we MUST return NO_REGS for symrefs with
2939 invalid addend which might have been pushed to the literal
2940 pool (no -fPIC). Usually we would expect them to be
2941 handled via secondary reload but this does not happen if
2942 they are used as literal pool slot replacement in reload
2943 inheritance (see emit_input_reload_insns). */
2944 if (TARGET_CPU_ZARCH
2945 && GET_CODE (XEXP (op, 0)) == PLUS
2946 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
2947 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
2949 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
2957 if (!legitimate_reload_constant_p (op))
2961 /* load address will be used. */
2962 if (reg_class_subset_p (ADDR_REGS, rclass))
2974 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
2975 multiple of ALIGNMENT and the SYMBOL_REF being naturally
2979 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
2981 HOST_WIDE_INT addend;
2984 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
2987 if (addend & (alignment - 1))
2990 if (GET_CODE (symref) == SYMBOL_REF
2991 && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref))
2994 if (GET_CODE (symref) == UNSPEC
2995 && alignment <= UNITS_PER_LONG)
3001 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
3002 operand SCRATCH is used to reload the even part of the address and
3006 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
3008 HOST_WIDE_INT addend;
3011 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3015 /* Easy case. The addend is even so larl will do fine. */
3016 emit_move_insn (reg, addr);
3019 /* We can leave the scratch register untouched if the target
3020 register is a valid base register. */
3021 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
3022 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
3025 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
3026 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
3029 emit_move_insn (scratch,
3030 gen_rtx_CONST (Pmode,
3031 gen_rtx_PLUS (Pmode, symref,
3032 GEN_INT (addend - 1))));
3034 emit_move_insn (scratch, symref);
3036 /* Increment the address using la in order to avoid clobbering cc. */
3037 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
3041 /* Generate what is necessary to move between REG and MEM using
3042 SCRATCH. The direction is given by TOMEM. */
3045 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
3047 /* Reload might have pulled a constant out of the literal pool.
3048 Force it back in. */
3049 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
3050 || GET_CODE (mem) == CONST)
3051 mem = force_const_mem (GET_MODE (reg), mem);
3053 gcc_assert (MEM_P (mem));
3055 /* For a load from memory we can leave the scratch register
3056 untouched if the target register is a valid base register. */
3058 && REGNO (reg) < FIRST_PSEUDO_REGISTER
3059 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
3060 && GET_MODE (reg) == GET_MODE (scratch))
3063 /* Load address into scratch register. Since we can't have a
3064 secondary reload for a secondary reload we have to cover the case
3065 where larl would need a secondary reload here as well. */
3066 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
3068 /* Now we can use a standard load/store to do the move. */
3070 emit_move_insn (replace_equiv_address (mem, scratch), reg);
3072 emit_move_insn (reg, replace_equiv_address (mem, scratch));
3075 /* Inform reload about cases where moving X with a mode MODE to a register in
3076 RCLASS requires an extra scratch or immediate register. Return the class
3077 needed for the immediate register. */
3080 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
3081 enum machine_mode mode, secondary_reload_info *sri)
3083 enum reg_class rclass = (enum reg_class) rclass_i;
3085 /* Intermediate register needed. */
3086 if (reg_classes_intersect_p (CC_REGS, rclass))
3087 return GENERAL_REGS;
3091 HOST_WIDE_INT offset;
3094 /* On z10 several optimizer steps may generate larl operands with
3097 && s390_loadrelative_operand_p (x, &symref, &offset)
3099 && !SYMBOL_REF_ALIGN1_P (symref)
3100 && (offset & 1) == 1)
3101 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
3102 : CODE_FOR_reloadsi_larl_odd_addend_z10);
3104 /* On z10 we need a scratch register when moving QI, TI or floating
3105 point mode values from or to a memory location with a SYMBOL_REF
3106 or if the symref addend of a SI or DI move is not aligned to the
3107 width of the access. */
3109 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
3110 && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode)
3111 || (!TARGET_ZARCH && mode == DImode)
3112 || ((mode == HImode || mode == SImode || mode == DImode)
3113 && (!s390_check_symref_alignment (XEXP (x, 0),
3114 GET_MODE_SIZE (mode))))))
3116 #define __SECONDARY_RELOAD_CASE(M,m) \
3119 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
3120 CODE_FOR_reload##m##di_tomem_z10; \
3122 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
3123 CODE_FOR_reload##m##si_tomem_z10; \
3126 switch (GET_MODE (x))
3128 __SECONDARY_RELOAD_CASE (QI, qi);
3129 __SECONDARY_RELOAD_CASE (HI, hi);
3130 __SECONDARY_RELOAD_CASE (SI, si);
3131 __SECONDARY_RELOAD_CASE (DI, di);
3132 __SECONDARY_RELOAD_CASE (TI, ti);
3133 __SECONDARY_RELOAD_CASE (SF, sf);
3134 __SECONDARY_RELOAD_CASE (DF, df);
3135 __SECONDARY_RELOAD_CASE (TF, tf);
3136 __SECONDARY_RELOAD_CASE (SD, sd);
3137 __SECONDARY_RELOAD_CASE (DD, dd);
3138 __SECONDARY_RELOAD_CASE (TD, td);
3143 #undef __SECONDARY_RELOAD_CASE
3147 /* We need a scratch register when loading a PLUS expression which
3148 is not a legitimate operand of the LOAD ADDRESS instruction. */
3149 /* LRA can deal with transformation of plus op very well -- so we
3150 don't need to prompt LRA in this case. */
3151 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
3152 sri->icode = (TARGET_64BIT ?
3153 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
3155 /* Performing a multiword move from or to memory we have to make sure the
3156 second chunk in memory is addressable without causing a displacement
3157 overflow. If that would be the case we calculate the address in
3158 a scratch register. */
3160 && GET_CODE (XEXP (x, 0)) == PLUS
3161 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3162 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
3163 + GET_MODE_SIZE (mode) - 1))
3165 /* For GENERAL_REGS a displacement overflow is no problem if occurring
3166 in a s_operand address since we may fallback to lm/stm. So we only
3167 have to care about overflows in the b+i+d case. */
3168 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
3169 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
3170 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
3171 /* For FP_REGS no lm/stm is available so this check is triggered
3172 for displacement overflows in b+i+d and b+d like addresses. */
3173 || (reg_classes_intersect_p (FP_REGS, rclass)
3174 && s390_class_max_nregs (FP_REGS, mode) > 1))
3177 sri->icode = (TARGET_64BIT ?
3178 CODE_FOR_reloaddi_nonoffmem_in :
3179 CODE_FOR_reloadsi_nonoffmem_in);
3181 sri->icode = (TARGET_64BIT ?
3182 CODE_FOR_reloaddi_nonoffmem_out :
3183 CODE_FOR_reloadsi_nonoffmem_out);
3187 /* A scratch address register is needed when a symbolic constant is
3188 copied to r0 compiling with -fPIC. In other cases the target
3189 register might be used as temporary (see legitimize_pic_address). */
3190 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
3191 sri->icode = (TARGET_64BIT ?
3192 CODE_FOR_reloaddi_PIC_addr :
3193 CODE_FOR_reloadsi_PIC_addr);
3195 /* Either scratch or no register needed. */
3199 /* Generate code to load SRC, which is PLUS that is not a
3200 legitimate operand for the LA instruction, into TARGET.
3201 SCRATCH may be used as scratch register. */
3204 s390_expand_plus_operand (rtx target, rtx src,
3208 struct s390_address ad;
3210 /* src must be a PLUS; get its two operands. */
3211 gcc_assert (GET_CODE (src) == PLUS);
3212 gcc_assert (GET_MODE (src) == Pmode);
3214 /* Check if any of the two operands is already scheduled
3215 for replacement by reload. This can happen e.g. when
3216 float registers occur in an address. */
3217 sum1 = find_replacement (&XEXP (src, 0));
3218 sum2 = find_replacement (&XEXP (src, 1));
3219 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3221 /* If the address is already strictly valid, there's nothing to do. */
3222 if (!s390_decompose_address (src, &ad)
3223 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3224 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
3226 /* Otherwise, one of the operands cannot be an address register;
3227 we reload its value into the scratch register. */
3228 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
3230 emit_move_insn (scratch, sum1);
3233 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
3235 emit_move_insn (scratch, sum2);
3239 /* According to the way these invalid addresses are generated
3240 in reload.c, it should never happen (at least on s390) that
3241 *neither* of the PLUS components, after find_replacements
3242 was applied, is an address register. */
3243 if (sum1 == scratch && sum2 == scratch)
3249 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3252 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
3253 is only ever performed on addresses, so we can mark the
3254 sum as legitimate for LA in any case. */
3255 s390_load_address (target, src);
3259 /* Return true if ADDR is a valid memory address.
3260 STRICT specifies whether strict register checking applies. */
3263 s390_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3265 struct s390_address ad;
3268 && larl_operand (addr, VOIDmode)
3269 && (mode == VOIDmode
3270 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
3273 if (!s390_decompose_address (addr, &ad))
3278 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3281 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
3287 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
3288 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
3292 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
3293 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
3299 /* Return true if OP is a valid operand for the LA instruction.
3300 In 31-bit, we need to prove that the result is used as an
3301 address, as LA performs only a 31-bit addition. */
3304 legitimate_la_operand_p (rtx op)
3306 struct s390_address addr;
3307 if (!s390_decompose_address (op, &addr))
3310 return (TARGET_64BIT || addr.pointer);
3313 /* Return true if it is valid *and* preferable to use LA to
3314 compute the sum of OP1 and OP2. */
3317 preferred_la_operand_p (rtx op1, rtx op2)
3319 struct s390_address addr;
3321 if (op2 != const0_rtx)
3322 op1 = gen_rtx_PLUS (Pmode, op1, op2);
3324 if (!s390_decompose_address (op1, &addr))
3326 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
3328 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
3331 /* Avoid LA instructions with index register on z196; it is
3332 preferable to use regular add instructions when possible.
3333 Starting with zEC12 the la with index register is "uncracked"
3335 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
3338 if (!TARGET_64BIT && !addr.pointer)
3344 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
3345 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
3351 /* Emit a forced load-address operation to load SRC into DST.
3352 This will use the LOAD ADDRESS instruction even in situations
3353 where legitimate_la_operand_p (SRC) returns false. */
3356 s390_load_address (rtx dst, rtx src)
3359 emit_move_insn (dst, src);
3361 emit_insn (gen_force_la_31 (dst, src));
3364 /* Return a legitimate reference for ORIG (an address) using the
3365 register REG. If REG is 0, a new pseudo is generated.
3367 There are two types of references that must be handled:
3369 1. Global data references must load the address from the GOT, via
3370 the PIC reg. An insn is emitted to do this load, and the reg is
3373 2. Static data references, constant pool addresses, and code labels
3374 compute the address as an offset from the GOT, whose base is in
3375 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
3376 differentiate them from global data objects. The returned
3377 address is the PIC reg + an unspec constant.
3379 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
3380 reg also appears in the address. */
3383 legitimize_pic_address (rtx orig, rtx reg)
3386 rtx addend = const0_rtx;
3389 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
3391 if (GET_CODE (addr) == CONST)
3392 addr = XEXP (addr, 0);
3394 if (GET_CODE (addr) == PLUS)
3396 addend = XEXP (addr, 1);
3397 addr = XEXP (addr, 0);
3400 if ((GET_CODE (addr) == LABEL_REF
3401 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
3402 || (GET_CODE (addr) == UNSPEC &&
3403 (XINT (addr, 1) == UNSPEC_GOTENT
3404 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3405 && GET_CODE (addend) == CONST_INT)
3407 /* This can be locally addressed. */
3409 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
3410 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
3411 gen_rtx_CONST (Pmode, addr) : addr);
3413 if (TARGET_CPU_ZARCH
3414 && larl_operand (const_addr, VOIDmode)
3415 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
3416 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
3418 if (INTVAL (addend) & 1)
3420 /* LARL can't handle odd offsets, so emit a pair of LARL
3422 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3424 if (!DISP_IN_RANGE (INTVAL (addend)))
3426 HOST_WIDE_INT even = INTVAL (addend) - 1;
3427 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
3428 addr = gen_rtx_CONST (Pmode, addr);
3429 addend = const1_rtx;
3432 emit_move_insn (temp, addr);
3433 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
3437 s390_load_address (reg, new_rtx);
3443 /* If the offset is even, we can just use LARL. This
3444 will happen automatically. */
3449 /* No larl - Access local symbols relative to the GOT. */
3451 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3453 if (reload_in_progress || reload_completed)
3454 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3456 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
3457 if (addend != const0_rtx)
3458 addr = gen_rtx_PLUS (Pmode, addr, addend);
3459 addr = gen_rtx_CONST (Pmode, addr);
3460 addr = force_const_mem (Pmode, addr);
3461 emit_move_insn (temp, addr);
3463 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3466 s390_load_address (reg, new_rtx);
3471 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
3473 /* A non-local symbol reference without addend.
3475 The symbol ref is wrapped into an UNSPEC to make sure the
3476 proper operand modifier (@GOT or @GOTENT) will be emitted.
3477 This will tell the linker to put the symbol into the GOT.
3479 Additionally the code dereferencing the GOT slot is emitted here.
3481 An addend to the symref needs to be added afterwards.
3482 legitimize_pic_address calls itself recursively to handle
3483 that case. So no need to do it here. */
3486 reg = gen_reg_rtx (Pmode);
3490 /* Use load relative if possible.
3491 lgrl <target>, sym@GOTENT */
3492 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3493 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3494 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
3496 emit_move_insn (reg, new_rtx);
3499 else if (flag_pic == 1)
3501 /* Assume GOT offset is a valid displacement operand (< 4k
3502 or < 512k with z990). This is handled the same way in
3503 both 31- and 64-bit code (@GOT).
3504 lg <target>, sym@GOT(r12) */
3506 if (reload_in_progress || reload_completed)
3507 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3509 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3510 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3511 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3512 new_rtx = gen_const_mem (Pmode, new_rtx);
3513 emit_move_insn (reg, new_rtx);
3516 else if (TARGET_CPU_ZARCH)
3518 /* If the GOT offset might be >= 4k, we determine the position
3519 of the GOT entry via a PC-relative LARL (@GOTENT).
3520 larl temp, sym@GOTENT
3521 lg <target>, 0(temp) */
3523 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3525 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3526 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3528 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3529 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3530 emit_move_insn (temp, new_rtx);
3532 new_rtx = gen_const_mem (Pmode, temp);
3533 emit_move_insn (reg, new_rtx);
3539 /* If the GOT offset might be >= 4k, we have to load it
3540 from the literal pool (@GOT).
3542 lg temp, lit-litbase(r13)
3543 lg <target>, 0(temp)
3544 lit: .long sym@GOT */
3546 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3548 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3549 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3551 if (reload_in_progress || reload_completed)
3552 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3554 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3555 addr = gen_rtx_CONST (Pmode, addr);
3556 addr = force_const_mem (Pmode, addr);
3557 emit_move_insn (temp, addr);
3559 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3560 new_rtx = gen_const_mem (Pmode, new_rtx);
3561 emit_move_insn (reg, new_rtx);
3565 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
3567 gcc_assert (XVECLEN (addr, 0) == 1);
3568 switch (XINT (addr, 1))
3570 /* These address symbols (or PLT slots) relative to the GOT
3571 (not GOT slots!). In general this will exceed the
3572 displacement range so these value belong into the literal
3576 new_rtx = force_const_mem (Pmode, orig);
3579 /* For -fPIC the GOT size might exceed the displacement
3580 range so make sure the value is in the literal pool. */
3583 new_rtx = force_const_mem (Pmode, orig);
3586 /* For @GOTENT larl is used. This is handled like local
3592 /* @PLT is OK as is on 64-bit, must be converted to
3593 GOT-relative @PLTOFF on 31-bit. */
3595 if (!TARGET_CPU_ZARCH)
3597 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3599 if (reload_in_progress || reload_completed)
3600 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3602 addr = XVECEXP (addr, 0, 0);
3603 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
3605 if (addend != const0_rtx)
3606 addr = gen_rtx_PLUS (Pmode, addr, addend);
3607 addr = gen_rtx_CONST (Pmode, addr);
3608 addr = force_const_mem (Pmode, addr);
3609 emit_move_insn (temp, addr);
3611 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3614 s390_load_address (reg, new_rtx);
3619 /* On 64 bit larl can be used. This case is handled like
3620 local symbol refs. */
3624 /* Everything else cannot happen. */
3629 else if (addend != const0_rtx)
3631 /* Otherwise, compute the sum. */
3633 rtx base = legitimize_pic_address (addr, reg);
3634 new_rtx = legitimize_pic_address (addend,
3635 base == reg ? NULL_RTX : reg);
3636 if (GET_CODE (new_rtx) == CONST_INT)
3637 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
3640 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
3642 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
3643 new_rtx = XEXP (new_rtx, 1);
3645 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
3648 if (GET_CODE (new_rtx) == CONST)
3649 new_rtx = XEXP (new_rtx, 0);
3650 new_rtx = force_operand (new_rtx, 0);
3656 /* Load the thread pointer into a register. */
3659 s390_get_thread_pointer (void)
3661 rtx tp = gen_reg_rtx (Pmode);
3663 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
3664 mark_reg_pointer (tp, BITS_PER_WORD);
3669 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
3670 in s390_tls_symbol which always refers to __tls_get_offset.
3671 The returned offset is written to RESULT_REG and an USE rtx is
3672 generated for TLS_CALL. */
3674 static GTY(()) rtx s390_tls_symbol;
3677 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
3682 emit_insn (s390_load_got ());
3684 if (!s390_tls_symbol)
3685 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
3687 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
3688 gen_rtx_REG (Pmode, RETURN_REGNUM));
3690 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
3691 RTL_CONST_CALL_P (insn) = 1;
3694 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3695 this (thread-local) address. REG may be used as temporary. */
3698 legitimize_tls_address (rtx addr, rtx reg)
3700 rtx new_rtx, tls_call, temp, base, r2, insn;
3702 if (GET_CODE (addr) == SYMBOL_REF)
3703 switch (tls_symbolic_operand (addr))
3705 case TLS_MODEL_GLOBAL_DYNAMIC:
3707 r2 = gen_rtx_REG (Pmode, 2);
3708 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
3709 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3710 new_rtx = force_const_mem (Pmode, new_rtx);
3711 emit_move_insn (r2, new_rtx);
3712 s390_emit_tls_call_insn (r2, tls_call);
3713 insn = get_insns ();
3716 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3717 temp = gen_reg_rtx (Pmode);
3718 emit_libcall_block (insn, temp, r2, new_rtx);
3720 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3723 s390_load_address (reg, new_rtx);
3728 case TLS_MODEL_LOCAL_DYNAMIC:
3730 r2 = gen_rtx_REG (Pmode, 2);
3731 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
3732 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3733 new_rtx = force_const_mem (Pmode, new_rtx);
3734 emit_move_insn (r2, new_rtx);
3735 s390_emit_tls_call_insn (r2, tls_call);
3736 insn = get_insns ();
3739 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
3740 temp = gen_reg_rtx (Pmode);
3741 emit_libcall_block (insn, temp, r2, new_rtx);
3743 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3744 base = gen_reg_rtx (Pmode);
3745 s390_load_address (base, new_rtx);
3747 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
3748 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3749 new_rtx = force_const_mem (Pmode, new_rtx);
3750 temp = gen_reg_rtx (Pmode);
3751 emit_move_insn (temp, new_rtx);
3753 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
3756 s390_load_address (reg, new_rtx);
3761 case TLS_MODEL_INITIAL_EXEC:
3764 /* Assume GOT offset < 4k. This is handled the same way
3765 in both 31- and 64-bit code. */
3767 if (reload_in_progress || reload_completed)
3768 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3770 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3771 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3772 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3773 new_rtx = gen_const_mem (Pmode, new_rtx);
3774 temp = gen_reg_rtx (Pmode);
3775 emit_move_insn (temp, new_rtx);
3777 else if (TARGET_CPU_ZARCH)
3779 /* If the GOT offset might be >= 4k, we determine the position
3780 of the GOT entry via a PC-relative LARL. */
3782 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3783 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3784 temp = gen_reg_rtx (Pmode);
3785 emit_move_insn (temp, new_rtx);
3787 new_rtx = gen_const_mem (Pmode, temp);
3788 temp = gen_reg_rtx (Pmode);
3789 emit_move_insn (temp, new_rtx);
3793 /* If the GOT offset might be >= 4k, we have to load it
3794 from the literal pool. */
3796 if (reload_in_progress || reload_completed)
3797 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3799 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3800 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3801 new_rtx = force_const_mem (Pmode, new_rtx);
3802 temp = gen_reg_rtx (Pmode);
3803 emit_move_insn (temp, new_rtx);
3805 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3806 new_rtx = gen_const_mem (Pmode, new_rtx);
3808 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3809 temp = gen_reg_rtx (Pmode);
3810 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3814 /* In position-dependent code, load the absolute address of
3815 the GOT entry from the literal pool. */
3817 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3818 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3819 new_rtx = force_const_mem (Pmode, new_rtx);
3820 temp = gen_reg_rtx (Pmode);
3821 emit_move_insn (temp, new_rtx);
3824 new_rtx = gen_const_mem (Pmode, new_rtx);
3825 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3826 temp = gen_reg_rtx (Pmode);
3827 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3830 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3833 s390_load_address (reg, new_rtx);
3838 case TLS_MODEL_LOCAL_EXEC:
3839 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3840 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3841 new_rtx = force_const_mem (Pmode, new_rtx);
3842 temp = gen_reg_rtx (Pmode);
3843 emit_move_insn (temp, new_rtx);
3845 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3848 s390_load_address (reg, new_rtx);
3857 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
3859 switch (XINT (XEXP (addr, 0), 1))
3861 case UNSPEC_INDNTPOFF:
3862 gcc_assert (TARGET_CPU_ZARCH);
3871 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
3872 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3874 new_rtx = XEXP (XEXP (addr, 0), 0);
3875 if (GET_CODE (new_rtx) != SYMBOL_REF)
3876 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3878 new_rtx = legitimize_tls_address (new_rtx, reg);
3879 new_rtx = plus_constant (Pmode, new_rtx,
3880 INTVAL (XEXP (XEXP (addr, 0), 1)));
3881 new_rtx = force_operand (new_rtx, 0);
3885 gcc_unreachable (); /* for now ... */
3890 /* Emit insns making the address in operands[1] valid for a standard
3891 move to operands[0]. operands[1] is replaced by an address which
3892 should be used instead of the former RTX to emit the move
3896 emit_symbolic_move (rtx *operands)
3898 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
3900 if (GET_CODE (operands[0]) == MEM)
3901 operands[1] = force_reg (Pmode, operands[1]);
3902 else if (TLS_SYMBOLIC_CONST (operands[1]))
3903 operands[1] = legitimize_tls_address (operands[1], temp);
3905 operands[1] = legitimize_pic_address (operands[1], temp);
3908 /* Try machine-dependent ways of modifying an illegitimate address X
3909 to be legitimate. If we find one, return the new, valid address.
3911 OLDX is the address as it was before break_out_memory_refs was called.
3912 In some cases it is useful to look at this to decide what needs to be done.
3914 MODE is the mode of the operand pointed to by X.
3916 When -fpic is used, special handling is needed for symbolic references.
3917 See comments by legitimize_pic_address for details. */
3920 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3921 enum machine_mode mode ATTRIBUTE_UNUSED)
3923 rtx constant_term = const0_rtx;
3925 if (TLS_SYMBOLIC_CONST (x))
3927 x = legitimize_tls_address (x, 0);
3929 if (s390_legitimate_address_p (mode, x, FALSE))
3932 else if (GET_CODE (x) == PLUS
3933 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
3934 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
3940 if (SYMBOLIC_CONST (x)
3941 || (GET_CODE (x) == PLUS
3942 && (SYMBOLIC_CONST (XEXP (x, 0))
3943 || SYMBOLIC_CONST (XEXP (x, 1)))))
3944 x = legitimize_pic_address (x, 0);
3946 if (s390_legitimate_address_p (mode, x, FALSE))
3950 x = eliminate_constant_term (x, &constant_term);
3952 /* Optimize loading of large displacements by splitting them
3953 into the multiple of 4K and the rest; this allows the
3954 former to be CSE'd if possible.
3956 Don't do this if the displacement is added to a register
3957 pointing into the stack frame, as the offsets will
3958 change later anyway. */
3960 if (GET_CODE (constant_term) == CONST_INT
3961 && !TARGET_LONG_DISPLACEMENT
3962 && !DISP_IN_RANGE (INTVAL (constant_term))
3963 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
3965 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
3966 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
3968 rtx temp = gen_reg_rtx (Pmode);
3969 rtx val = force_operand (GEN_INT (upper), temp);
3971 emit_move_insn (temp, val);
3973 x = gen_rtx_PLUS (Pmode, x, temp);
3974 constant_term = GEN_INT (lower);
3977 if (GET_CODE (x) == PLUS)
3979 if (GET_CODE (XEXP (x, 0)) == REG)
3981 rtx temp = gen_reg_rtx (Pmode);
3982 rtx val = force_operand (XEXP (x, 1), temp);
3984 emit_move_insn (temp, val);
3986 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
3989 else if (GET_CODE (XEXP (x, 1)) == REG)
3991 rtx temp = gen_reg_rtx (Pmode);
3992 rtx val = force_operand (XEXP (x, 0), temp);
3994 emit_move_insn (temp, val);
3996 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
4000 if (constant_term != const0_rtx)
4001 x = gen_rtx_PLUS (Pmode, x, constant_term);
4006 /* Try a machine-dependent way of reloading an illegitimate address AD
4007 operand. If we find one, push the reload and return the new address.
4009 MODE is the mode of the enclosing MEM. OPNUM is the operand number
4010 and TYPE is the reload type of the current reload. */
4013 legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
4014 int opnum, int type)
4016 if (!optimize || TARGET_LONG_DISPLACEMENT)
4019 if (GET_CODE (ad) == PLUS)
4021 rtx tem = simplify_binary_operation (PLUS, Pmode,
4022 XEXP (ad, 0), XEXP (ad, 1));
4027 if (GET_CODE (ad) == PLUS
4028 && GET_CODE (XEXP (ad, 0)) == REG
4029 && GET_CODE (XEXP (ad, 1)) == CONST_INT
4030 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
4032 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
4033 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
4034 rtx cst, tem, new_rtx;
4036 cst = GEN_INT (upper);
4037 if (!legitimate_reload_constant_p (cst))
4038 cst = force_const_mem (Pmode, cst);
4040 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
4041 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
4043 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
4044 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
4045 opnum, (enum reload_type) type);
4052 /* Emit code to move LEN bytes from DST to SRC. */
4055 s390_expand_movmem (rtx dst, rtx src, rtx len)
4057 /* When tuning for z10 or higher we rely on the Glibc functions to
4058 do the right thing. Only for constant lengths below 64k we will
4059 generate inline code. */
4060 if (s390_tune >= PROCESSOR_2097_Z10
4061 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4064 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4066 if (INTVAL (len) > 0)
4067 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
4070 else if (TARGET_MVCLE)
4072 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
4077 rtx dst_addr, src_addr, count, blocks, temp;
4078 rtx_code_label *loop_start_label = gen_label_rtx ();
4079 rtx_code_label *loop_end_label = gen_label_rtx ();
4080 rtx_code_label *end_label = gen_label_rtx ();
4081 enum machine_mode mode;
4083 mode = GET_MODE (len);
4084 if (mode == VOIDmode)
4087 dst_addr = gen_reg_rtx (Pmode);
4088 src_addr = gen_reg_rtx (Pmode);
4089 count = gen_reg_rtx (mode);
4090 blocks = gen_reg_rtx (mode);
4092 convert_move (count, len, 1);
4093 emit_cmp_and_jump_insns (count, const0_rtx,
4094 EQ, NULL_RTX, mode, 1, end_label);
4096 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4097 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
4098 dst = change_address (dst, VOIDmode, dst_addr);
4099 src = change_address (src, VOIDmode, src_addr);
4101 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4104 emit_move_insn (count, temp);
4106 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4109 emit_move_insn (blocks, temp);
4111 emit_cmp_and_jump_insns (blocks, const0_rtx,
4112 EQ, NULL_RTX, mode, 1, loop_end_label);
4114 emit_label (loop_start_label);
4117 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
4121 /* Issue a read prefetch for the +3 cache line. */
4122 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
4123 const0_rtx, const0_rtx);
4124 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4125 emit_insn (prefetch);
4127 /* Issue a write prefetch for the +3 cache line. */
4128 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
4129 const1_rtx, const0_rtx);
4130 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4131 emit_insn (prefetch);
4134 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
4135 s390_load_address (dst_addr,
4136 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4137 s390_load_address (src_addr,
4138 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
4140 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4143 emit_move_insn (blocks, temp);
4145 emit_cmp_and_jump_insns (blocks, const0_rtx,
4146 EQ, NULL_RTX, mode, 1, loop_end_label);
4148 emit_jump (loop_start_label);
4149 emit_label (loop_end_label);
4151 emit_insn (gen_movmem_short (dst, src,
4152 convert_to_mode (Pmode, count, 1)));
4153 emit_label (end_label);
4158 /* Emit code to set LEN bytes at DST to VAL.
4159 Make use of clrmem if VAL is zero. */
4162 s390_expand_setmem (rtx dst, rtx len, rtx val)
4164 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
4167 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
4169 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
4171 if (val == const0_rtx && INTVAL (len) <= 256)
4172 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
4175 /* Initialize memory by storing the first byte. */
4176 emit_move_insn (adjust_address (dst, QImode, 0), val);
4178 if (INTVAL (len) > 1)
4180 /* Initiate 1 byte overlap move.
4181 The first byte of DST is propagated through DSTP1.
4182 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
4183 DST is set to size 1 so the rest of the memory location
4184 does not count as source operand. */
4185 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
4186 set_mem_size (dst, 1);
4188 emit_insn (gen_movmem_short (dstp1, dst,
4189 GEN_INT (INTVAL (len) - 2)));
4194 else if (TARGET_MVCLE)
4196 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
4197 emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
4202 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
4203 rtx_code_label *loop_start_label = gen_label_rtx ();
4204 rtx_code_label *loop_end_label = gen_label_rtx ();
4205 rtx_code_label *end_label = gen_label_rtx ();
4206 enum machine_mode mode;
4208 mode = GET_MODE (len);
4209 if (mode == VOIDmode)
4212 dst_addr = gen_reg_rtx (Pmode);
4213 count = gen_reg_rtx (mode);
4214 blocks = gen_reg_rtx (mode);
4216 convert_move (count, len, 1);
4217 emit_cmp_and_jump_insns (count, const0_rtx,
4218 EQ, NULL_RTX, mode, 1, end_label);
4220 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4221 dst = change_address (dst, VOIDmode, dst_addr);
4223 if (val == const0_rtx)
4224 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4228 dstp1 = adjust_address (dst, VOIDmode, 1);
4229 set_mem_size (dst, 1);
4231 /* Initialize memory by storing the first byte. */
4232 emit_move_insn (adjust_address (dst, QImode, 0), val);
4234 /* If count is 1 we are done. */
4235 emit_cmp_and_jump_insns (count, const1_rtx,
4236 EQ, NULL_RTX, mode, 1, end_label);
4238 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
4242 emit_move_insn (count, temp);
4244 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4247 emit_move_insn (blocks, temp);
4249 emit_cmp_and_jump_insns (blocks, const0_rtx,
4250 EQ, NULL_RTX, mode, 1, loop_end_label);
4252 emit_label (loop_start_label);
4255 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
4257 /* Issue a write prefetch for the +4 cache line. */
4258 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
4260 const1_rtx, const0_rtx);
4261 emit_insn (prefetch);
4262 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4265 if (val == const0_rtx)
4266 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
4268 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
4269 s390_load_address (dst_addr,
4270 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4272 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4275 emit_move_insn (blocks, temp);
4277 emit_cmp_and_jump_insns (blocks, const0_rtx,
4278 EQ, NULL_RTX, mode, 1, loop_end_label);
4280 emit_jump (loop_start_label);
4281 emit_label (loop_end_label);
4283 if (val == const0_rtx)
4284 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
4286 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
4287 emit_label (end_label);
4291 /* Emit code to compare LEN bytes at OP0 with those at OP1,
4292 and return the result in TARGET. */
4295 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
4297 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
4300 /* When tuning for z10 or higher we rely on the Glibc functions to
4301 do the right thing. Only for constant lengths below 64k we will
4302 generate inline code. */
4303 if (s390_tune >= PROCESSOR_2097_Z10
4304 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4307 /* As the result of CMPINT is inverted compared to what we need,
4308 we have to swap the operands. */
4309 tmp = op0; op0 = op1; op1 = tmp;
4311 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4313 if (INTVAL (len) > 0)
4315 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
4316 emit_insn (gen_cmpint (target, ccreg));
4319 emit_move_insn (target, const0_rtx);
4321 else if (TARGET_MVCLE)
4323 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
4324 emit_insn (gen_cmpint (target, ccreg));
4328 rtx addr0, addr1, count, blocks, temp;
4329 rtx_code_label *loop_start_label = gen_label_rtx ();
4330 rtx_code_label *loop_end_label = gen_label_rtx ();
4331 rtx_code_label *end_label = gen_label_rtx ();
4332 enum machine_mode mode;
4334 mode = GET_MODE (len);
4335 if (mode == VOIDmode)
4338 addr0 = gen_reg_rtx (Pmode);
4339 addr1 = gen_reg_rtx (Pmode);
4340 count = gen_reg_rtx (mode);
4341 blocks = gen_reg_rtx (mode);
4343 convert_move (count, len, 1);
4344 emit_cmp_and_jump_insns (count, const0_rtx,
4345 EQ, NULL_RTX, mode, 1, end_label);
4347 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
4348 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
4349 op0 = change_address (op0, VOIDmode, addr0);
4350 op1 = change_address (op1, VOIDmode, addr1);
4352 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4355 emit_move_insn (count, temp);
4357 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4360 emit_move_insn (blocks, temp);
4362 emit_cmp_and_jump_insns (blocks, const0_rtx,
4363 EQ, NULL_RTX, mode, 1, loop_end_label);
4365 emit_label (loop_start_label);
4368 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
4372 /* Issue a read prefetch for the +2 cache line of operand 1. */
4373 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
4374 const0_rtx, const0_rtx);
4375 emit_insn (prefetch);
4376 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4378 /* Issue a read prefetch for the +2 cache line of operand 2. */
4379 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
4380 const0_rtx, const0_rtx);
4381 emit_insn (prefetch);
4382 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4385 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
4386 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
4387 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
4388 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
4389 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
4390 emit_jump_insn (temp);
4392 s390_load_address (addr0,
4393 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
4394 s390_load_address (addr1,
4395 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
4397 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4400 emit_move_insn (blocks, temp);
4402 emit_cmp_and_jump_insns (blocks, const0_rtx,
4403 EQ, NULL_RTX, mode, 1, loop_end_label);
4405 emit_jump (loop_start_label);
4406 emit_label (loop_end_label);
4408 emit_insn (gen_cmpmem_short (op0, op1,
4409 convert_to_mode (Pmode, count, 1)));
4410 emit_label (end_label);
4412 emit_insn (gen_cmpint (target, ccreg));
4418 /* Expand conditional increment or decrement using alc/slb instructions.
4419 Should generate code setting DST to either SRC or SRC + INCREMENT,
4420 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
4421 Returns true if successful, false otherwise.
4423 That makes it possible to implement some if-constructs without jumps e.g.:
4424 (borrow = CC0 | CC1 and carry = CC2 | CC3)
4425 unsigned int a, b, c;
4426 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
4427 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
4428 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
4429 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
4431 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
4432 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
4433 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
4434 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
4435 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
4438 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
4439 rtx dst, rtx src, rtx increment)
4441 enum machine_mode cmp_mode;
4442 enum machine_mode cc_mode;
4448 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
4449 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
4451 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
4452 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
4457 /* Try ADD LOGICAL WITH CARRY. */
4458 if (increment == const1_rtx)
4460 /* Determine CC mode to use. */
4461 if (cmp_code == EQ || cmp_code == NE)
4463 if (cmp_op1 != const0_rtx)
4465 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4466 NULL_RTX, 0, OPTAB_WIDEN);
4467 cmp_op1 = const0_rtx;
4470 cmp_code = cmp_code == EQ ? LEU : GTU;
4473 if (cmp_code == LTU || cmp_code == LEU)
4478 cmp_code = swap_condition (cmp_code);
4495 /* Emit comparison instruction pattern. */
4496 if (!register_operand (cmp_op0, cmp_mode))
4497 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4499 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4500 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4501 /* We use insn_invalid_p here to add clobbers if required. */
4502 ret = insn_invalid_p (emit_insn (insn), false);
4505 /* Emit ALC instruction pattern. */
4506 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4507 gen_rtx_REG (cc_mode, CC_REGNUM),
4510 if (src != const0_rtx)
4512 if (!register_operand (src, GET_MODE (dst)))
4513 src = force_reg (GET_MODE (dst), src);
4515 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
4516 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
4519 p = rtvec_alloc (2);
4521 gen_rtx_SET (VOIDmode, dst, op_res);
4523 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4524 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4529 /* Try SUBTRACT LOGICAL WITH BORROW. */
4530 if (increment == constm1_rtx)
4532 /* Determine CC mode to use. */
4533 if (cmp_code == EQ || cmp_code == NE)
4535 if (cmp_op1 != const0_rtx)
4537 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4538 NULL_RTX, 0, OPTAB_WIDEN);
4539 cmp_op1 = const0_rtx;
4542 cmp_code = cmp_code == EQ ? LEU : GTU;
4545 if (cmp_code == GTU || cmp_code == GEU)
4550 cmp_code = swap_condition (cmp_code);
4567 /* Emit comparison instruction pattern. */
4568 if (!register_operand (cmp_op0, cmp_mode))
4569 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4571 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4572 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4573 /* We use insn_invalid_p here to add clobbers if required. */
4574 ret = insn_invalid_p (emit_insn (insn), false);
4577 /* Emit SLB instruction pattern. */
4578 if (!register_operand (src, GET_MODE (dst)))
4579 src = force_reg (GET_MODE (dst), src);
4581 op_res = gen_rtx_MINUS (GET_MODE (dst),
4582 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
4583 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4584 gen_rtx_REG (cc_mode, CC_REGNUM),
4586 p = rtvec_alloc (2);
4588 gen_rtx_SET (VOIDmode, dst, op_res);
4590 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4591 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4599 /* Expand code for the insv template. Return true if successful. */
4602 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
4604 int bitsize = INTVAL (op1);
4605 int bitpos = INTVAL (op2);
4606 enum machine_mode mode = GET_MODE (dest);
4607 enum machine_mode smode;
4608 int smode_bsize, mode_bsize;
4611 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
4614 /* Generate INSERT IMMEDIATE (IILL et al). */
4615 /* (set (ze (reg)) (const_int)). */
4617 && register_operand (dest, word_mode)
4618 && (bitpos % 16) == 0
4619 && (bitsize % 16) == 0
4620 && const_int_operand (src, VOIDmode))
4622 HOST_WIDE_INT val = INTVAL (src);
4623 int regpos = bitpos + bitsize;
4625 while (regpos > bitpos)
4627 enum machine_mode putmode;
4630 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
4635 putsize = GET_MODE_BITSIZE (putmode);
4637 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4640 gen_int_mode (val, putmode));
4643 gcc_assert (regpos == bitpos);
4647 smode = smallest_mode_for_size (bitsize, MODE_INT);
4648 smode_bsize = GET_MODE_BITSIZE (smode);
4649 mode_bsize = GET_MODE_BITSIZE (mode);
4651 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
4653 && (bitsize % BITS_PER_UNIT) == 0
4655 && (register_operand (src, word_mode)
4656 || const_int_operand (src, VOIDmode)))
4658 /* Emit standard pattern if possible. */
4659 if (smode_bsize == bitsize)
4661 emit_move_insn (adjust_address (dest, smode, 0),
4662 gen_lowpart (smode, src));
4666 /* (set (ze (mem)) (const_int)). */
4667 else if (const_int_operand (src, VOIDmode))
4669 int size = bitsize / BITS_PER_UNIT;
4670 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
4672 UNITS_PER_WORD - size);
4674 dest = adjust_address (dest, BLKmode, 0);
4675 set_mem_size (dest, size);
4676 s390_expand_movmem (dest, src_mem, GEN_INT (size));
4680 /* (set (ze (mem)) (reg)). */
4681 else if (register_operand (src, word_mode))
4684 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
4688 /* Emit st,stcmh sequence. */
4689 int stcmh_width = bitsize - 32;
4690 int size = stcmh_width / BITS_PER_UNIT;
4692 emit_move_insn (adjust_address (dest, SImode, size),
4693 gen_lowpart (SImode, src));
4694 set_mem_size (dest, size);
4695 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4696 GEN_INT (stcmh_width),
4698 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
4704 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
4705 if ((bitpos % BITS_PER_UNIT) == 0
4706 && (bitsize % BITS_PER_UNIT) == 0
4707 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
4709 && (mode == DImode || mode == SImode)
4710 && register_operand (dest, mode))
4712 /* Emit a strict_low_part pattern if possible. */
4713 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
4715 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
4716 op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src));
4717 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4718 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
4722 /* ??? There are more powerful versions of ICM that are not
4723 completely represented in the md file. */
4726 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
4727 if (TARGET_Z10 && (mode == DImode || mode == SImode))
4729 enum machine_mode mode_s = GET_MODE (src);
4731 if (mode_s == VOIDmode)
4733 /* Assume const_int etc already in the proper mode. */
4734 src = force_reg (mode, src);
4736 else if (mode_s != mode)
4738 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
4739 src = force_reg (mode_s, src);
4740 src = gen_lowpart (mode, src);
4743 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
4744 op = gen_rtx_SET (VOIDmode, op, src);
4748 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4749 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
4759 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
4760 register that holds VAL of mode MODE shifted by COUNT bits. */
4763 s390_expand_mask_and_shift (rtx val, enum machine_mode mode, rtx count)
4765 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
4766 NULL_RTX, 1, OPTAB_DIRECT);
4767 return expand_simple_binop (SImode, ASHIFT, val, count,
4768 NULL_RTX, 1, OPTAB_DIRECT);
4771 /* Structure to hold the initial parameters for a compare_and_swap operation
4772 in HImode and QImode. */
4774 struct alignment_context
4776 rtx memsi; /* SI aligned memory location. */
4777 rtx shift; /* Bit offset with regard to lsb. */
4778 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
4779 rtx modemaski; /* ~modemask */
4780 bool aligned; /* True if memory is aligned, false else. */
4783 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
4784 structure AC for transparent simplifying, if the memory alignment is known
4785 to be at least 32bit. MEM is the memory location for the actual operation
4786 and MODE its mode. */
4789 init_alignment_context (struct alignment_context *ac, rtx mem,
4790 enum machine_mode mode)
4792 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
4793 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
4796 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
4799 /* Alignment is unknown. */
4800 rtx byteoffset, addr, align;
4802 /* Force the address into a register. */
4803 addr = force_reg (Pmode, XEXP (mem, 0));
4805 /* Align it to SImode. */
4806 align = expand_simple_binop (Pmode, AND, addr,
4807 GEN_INT (-GET_MODE_SIZE (SImode)),
4808 NULL_RTX, 1, OPTAB_DIRECT);
4810 ac->memsi = gen_rtx_MEM (SImode, align);
4811 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
4812 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
4813 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
4815 /* Calculate shiftcount. */
4816 byteoffset = expand_simple_binop (Pmode, AND, addr,
4817 GEN_INT (GET_MODE_SIZE (SImode) - 1),
4818 NULL_RTX, 1, OPTAB_DIRECT);
4819 /* As we already have some offset, evaluate the remaining distance. */
4820 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
4821 NULL_RTX, 1, OPTAB_DIRECT);
4824 /* Shift is the byte count, but we need the bitcount. */
4825 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
4826 NULL_RTX, 1, OPTAB_DIRECT);
4828 /* Calculate masks. */
4829 ac->modemask = expand_simple_binop (SImode, ASHIFT,
4830 GEN_INT (GET_MODE_MASK (mode)),
4831 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
4832 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
4836 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
4837 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
4838 perform the merge in SEQ2. */
4841 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
4842 enum machine_mode mode, rtx val, rtx ins)
4849 tmp = copy_to_mode_reg (SImode, val);
4850 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
4854 *seq2 = get_insns ();
4861 /* Failed to use insv. Generate a two part shift and mask. */
4863 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
4864 *seq1 = get_insns ();
4868 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
4869 *seq2 = get_insns ();
4875 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
4876 the memory location, CMP the old value to compare MEM with and NEW_RTX the
4877 value to set if CMP == MEM. */
4880 s390_expand_cs_hqi (enum machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
4881 rtx cmp, rtx new_rtx, bool is_weak)
4883 struct alignment_context ac;
4884 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
4885 rtx res = gen_reg_rtx (SImode);
4886 rtx_code_label *csloop = NULL, *csend = NULL;
4888 gcc_assert (MEM_P (mem));
4890 init_alignment_context (&ac, mem, mode);
4892 /* Load full word. Subsequent loads are performed by CS. */
4893 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
4894 NULL_RTX, 1, OPTAB_DIRECT);
4896 /* Prepare insertions of cmp and new_rtx into the loaded value. When
4897 possible, we try to use insv to make this happen efficiently. If
4898 that fails we'll generate code both inside and outside the loop. */
4899 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
4900 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
4907 /* Start CS loop. */
4910 /* Begin assuming success. */
4911 emit_move_insn (btarget, const1_rtx);
4913 csloop = gen_label_rtx ();
4914 csend = gen_label_rtx ();
4915 emit_label (csloop);
4918 /* val = "<mem>00..0<mem>"
4919 * cmp = "00..0<cmp>00..0"
4920 * new = "00..0<new>00..0"
4926 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
4928 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
4933 /* Jump to end if we're done (likely?). */
4934 s390_emit_jump (csend, cc);
4936 /* Check for changes outside mode, and loop internal if so.
4937 Arrange the moves so that the compare is adjacent to the
4938 branch so that we can generate CRJ. */
4939 tmp = copy_to_reg (val);
4940 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
4942 cc = s390_emit_compare (NE, val, tmp);
4943 s390_emit_jump (csloop, cc);
4946 emit_move_insn (btarget, const0_rtx);
4950 /* Return the correct part of the bitfield. */
4951 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
4952 NULL_RTX, 1, OPTAB_DIRECT), 1);
4955 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
4956 and VAL the value to play with. If AFTER is true then store the value
4957 MEM holds after the operation, if AFTER is false then store the value MEM
4958 holds before the operation. If TARGET is zero then discard that value, else
4959 store it to TARGET. */
4962 s390_expand_atomic (enum machine_mode mode, enum rtx_code code,
4963 rtx target, rtx mem, rtx val, bool after)
4965 struct alignment_context ac;
4967 rtx new_rtx = gen_reg_rtx (SImode);
4968 rtx orig = gen_reg_rtx (SImode);
4969 rtx_code_label *csloop = gen_label_rtx ();
4971 gcc_assert (!target || register_operand (target, VOIDmode));
4972 gcc_assert (MEM_P (mem));
4974 init_alignment_context (&ac, mem, mode);
4976 /* Shift val to the correct bit positions.
4977 Preserve "icm", but prevent "ex icm". */
4978 if (!(ac.aligned && code == SET && MEM_P (val)))
4979 val = s390_expand_mask_and_shift (val, mode, ac.shift);
4981 /* Further preparation insns. */
4982 if (code == PLUS || code == MINUS)
4983 emit_move_insn (orig, val);
4984 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
4985 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
4986 NULL_RTX, 1, OPTAB_DIRECT);
4988 /* Load full word. Subsequent loads are performed by CS. */
4989 cmp = force_reg (SImode, ac.memsi);
4991 /* Start CS loop. */
4992 emit_label (csloop);
4993 emit_move_insn (new_rtx, cmp);
4995 /* Patch new with val at correct position. */
5000 val = expand_simple_binop (SImode, code, new_rtx, orig,
5001 NULL_RTX, 1, OPTAB_DIRECT);
5002 val = expand_simple_binop (SImode, AND, val, ac.modemask,
5003 NULL_RTX, 1, OPTAB_DIRECT);
5006 if (ac.aligned && MEM_P (val))
5007 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
5011 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
5012 NULL_RTX, 1, OPTAB_DIRECT);
5013 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
5014 NULL_RTX, 1, OPTAB_DIRECT);
5020 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
5021 NULL_RTX, 1, OPTAB_DIRECT);
5023 case MULT: /* NAND */
5024 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
5025 NULL_RTX, 1, OPTAB_DIRECT);
5026 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
5027 NULL_RTX, 1, OPTAB_DIRECT);
5033 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
5034 ac.memsi, cmp, new_rtx));
5036 /* Return the correct part of the bitfield. */
5038 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
5039 after ? new_rtx : cmp, ac.shift,
5040 NULL_RTX, 1, OPTAB_DIRECT), 1);
5043 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5044 We need to emit DTP-relative relocations. */
5046 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
5049 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
5054 fputs ("\t.long\t", file);
5057 fputs ("\t.quad\t", file);
5062 output_addr_const (file, x);
5063 fputs ("@DTPOFF", file);
5066 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
5067 /* Implement TARGET_MANGLE_TYPE. */
5070 s390_mangle_type (const_tree type)
5072 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
5073 && TARGET_LONG_DOUBLE_128)
5076 /* For all other types, use normal C++ mangling. */
5081 /* In the name of slightly smaller debug output, and to cater to
5082 general assembler lossage, recognize various UNSPEC sequences
5083 and turn them back into a direct symbol reference. */
5086 s390_delegitimize_address (rtx orig_x)
5090 orig_x = delegitimize_mem_from_attrs (orig_x);
5093 /* Extract the symbol ref from:
5094 (plus:SI (reg:SI 12 %r12)
5095 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
5096 UNSPEC_GOTOFF/PLTOFF)))
5098 (plus:SI (reg:SI 12 %r12)
5099 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
5100 UNSPEC_GOTOFF/PLTOFF)
5101 (const_int 4 [0x4])))) */
5102 if (GET_CODE (x) == PLUS
5103 && REG_P (XEXP (x, 0))
5104 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
5105 && GET_CODE (XEXP (x, 1)) == CONST)
5107 HOST_WIDE_INT offset = 0;
5109 /* The const operand. */
5110 y = XEXP (XEXP (x, 1), 0);
5112 if (GET_CODE (y) == PLUS
5113 && GET_CODE (XEXP (y, 1)) == CONST_INT)
5115 offset = INTVAL (XEXP (y, 1));
5119 if (GET_CODE (y) == UNSPEC
5120 && (XINT (y, 1) == UNSPEC_GOTOFF
5121 || XINT (y, 1) == UNSPEC_PLTOFF))
5122 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
5125 if (GET_CODE (x) != MEM)
5129 if (GET_CODE (x) == PLUS
5130 && GET_CODE (XEXP (x, 1)) == CONST
5131 && GET_CODE (XEXP (x, 0)) == REG
5132 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5134 y = XEXP (XEXP (x, 1), 0);
5135 if (GET_CODE (y) == UNSPEC
5136 && XINT (y, 1) == UNSPEC_GOT)
5137 y = XVECEXP (y, 0, 0);
5141 else if (GET_CODE (x) == CONST)
5143 /* Extract the symbol ref from:
5144 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
5145 UNSPEC_PLT/GOTENT))) */
5148 if (GET_CODE (y) == UNSPEC
5149 && (XINT (y, 1) == UNSPEC_GOTENT
5150 || XINT (y, 1) == UNSPEC_PLT))
5151 y = XVECEXP (y, 0, 0);
5158 if (GET_MODE (orig_x) != Pmode)
5160 if (GET_MODE (orig_x) == BLKmode)
5162 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
5169 /* Output operand OP to stdio stream FILE.
5170 OP is an address (register + offset) which is not used to address data;
5171 instead the rightmost bits are interpreted as the value. */
5174 print_shift_count_operand (FILE *file, rtx op)
5176 HOST_WIDE_INT offset;
5179 /* Extract base register and offset. */
5180 if (!s390_decompose_shift_count (op, &base, &offset))
5186 gcc_assert (GET_CODE (base) == REG);
5187 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
5188 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
5191 /* Offsets are constricted to twelve bits. */
5192 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
5194 fprintf (file, "(%s)", reg_names[REGNO (base)]);
5197 /* Returns -1 if the function should not be made hotpatchable. Otherwise it
5198 returns a number >= 0 that is the desired size of the hotpatch trampoline
5201 static int s390_function_num_hotpatch_trampoline_halfwords (tree decl,
5206 if (DECL_DECLARED_INLINE_P (decl)
5207 || DECL_ARTIFICIAL (decl)
5208 || MAIN_NAME_P (DECL_NAME (decl)))
5210 /* - Explicitly inlined functions cannot be hotpatched.
5211 - Artificial functions need not be hotpatched.
5212 - Making the main function hotpatchable is useless. */
5215 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
5216 if (attr || s390_hotpatch_trampoline_halfwords >= 0)
5218 if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (decl)))
5221 warning (OPT_Wattributes, "function %qE with the %qs attribute"
5222 " is not hotpatchable", DECL_NAME (decl), "always_inline");
5228 get_hotpatch_attribute (attr) : s390_hotpatch_trampoline_halfwords;
5235 /* Hook to determine if one function can safely inline another. */
5238 s390_can_inline_p (tree caller, tree callee)
5240 if (s390_function_num_hotpatch_trampoline_halfwords (callee, false) >= 0)
5243 return default_target_can_inline_p (caller, callee);
5246 /* Write the extra assembler code needed to declare a function properly. */
5249 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
5252 int hotpatch_trampoline_halfwords = -1;
5256 hotpatch_trampoline_halfwords =
5257 s390_function_num_hotpatch_trampoline_halfwords (decl, true);
5258 if (hotpatch_trampoline_halfwords >= 0
5259 && decl_function_context (decl) != NULL_TREE)
5261 warning_at (DECL_SOURCE_LOCATION (decl), OPT_mhotpatch,
5262 "hotpatching is not compatible with nested functions");
5263 hotpatch_trampoline_halfwords = -1;
5267 if (hotpatch_trampoline_halfwords > 0)
5271 /* Add a trampoline code area before the function label and initialize it
5272 with two-byte nop instructions. This area can be overwritten with code
5273 that jumps to a patched version of the function. */
5274 for (i = 0; i < hotpatch_trampoline_halfwords; i++)
5275 asm_fprintf (asm_out_file, "\tnopr\t%%r7\n");
5276 /* Note: The function label must be aligned so that (a) the bytes of the
5277 following nop do not cross a cacheline boundary, and (b) a jump address
5278 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
5279 stored directly before the label without crossing a cacheline
5280 boundary. All this is necessary to make sure the trampoline code can
5281 be changed atomically. */
5284 ASM_OUTPUT_LABEL (asm_out_file, fname);
5286 /* Output a four-byte nop if hotpatching is enabled. This can be overwritten
5287 atomically with a relative backwards jump to the trampoline area. */
5288 if (hotpatch_trampoline_halfwords >= 0)
5289 asm_fprintf (asm_out_file, "\tnop\t0\n");
5292 /* Output machine-dependent UNSPECs occurring in address constant X
5293 in assembler syntax to stdio stream FILE. Returns true if the
5294 constant X could be recognized, false otherwise. */
5297 s390_output_addr_const_extra (FILE *file, rtx x)
5299 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
5300 switch (XINT (x, 1))
5303 output_addr_const (file, XVECEXP (x, 0, 0));
5304 fprintf (file, "@GOTENT");
5307 output_addr_const (file, XVECEXP (x, 0, 0));
5308 fprintf (file, "@GOT");
5311 output_addr_const (file, XVECEXP (x, 0, 0));
5312 fprintf (file, "@GOTOFF");
5315 output_addr_const (file, XVECEXP (x, 0, 0));
5316 fprintf (file, "@PLT");
5319 output_addr_const (file, XVECEXP (x, 0, 0));
5320 fprintf (file, "@PLTOFF");
5323 output_addr_const (file, XVECEXP (x, 0, 0));
5324 fprintf (file, "@TLSGD");
5327 assemble_name (file, get_some_local_dynamic_name ());
5328 fprintf (file, "@TLSLDM");
5331 output_addr_const (file, XVECEXP (x, 0, 0));
5332 fprintf (file, "@DTPOFF");
5335 output_addr_const (file, XVECEXP (x, 0, 0));
5336 fprintf (file, "@NTPOFF");
5338 case UNSPEC_GOTNTPOFF:
5339 output_addr_const (file, XVECEXP (x, 0, 0));
5340 fprintf (file, "@GOTNTPOFF");
5342 case UNSPEC_INDNTPOFF:
5343 output_addr_const (file, XVECEXP (x, 0, 0));
5344 fprintf (file, "@INDNTPOFF");
5348 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
5349 switch (XINT (x, 1))
5351 case UNSPEC_POOL_OFFSET:
5352 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
5353 output_addr_const (file, x);
5359 /* Output address operand ADDR in assembler syntax to
5360 stdio stream FILE. */
5363 print_operand_address (FILE *file, rtx addr)
5365 struct s390_address ad;
5367 if (s390_loadrelative_operand_p (addr, NULL, NULL))
5371 output_operand_lossage ("symbolic memory references are "
5372 "only supported on z10 or later");
5375 output_addr_const (file, addr);
5379 if (!s390_decompose_address (addr, &ad)
5380 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5381 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
5382 output_operand_lossage ("cannot decompose address");
5385 output_addr_const (file, ad.disp);
5387 fprintf (file, "0");
5389 if (ad.base && ad.indx)
5390 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
5391 reg_names[REGNO (ad.base)]);
5393 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5396 /* Output operand X in assembler syntax to stdio stream FILE.
5397 CODE specified the format flag. The following format flags
5400 'C': print opcode suffix for branch condition.
5401 'D': print opcode suffix for inverse branch condition.
5402 'E': print opcode suffix for branch on index instruction.
5403 'G': print the size of the operand in bytes.
5404 'J': print tls_load/tls_gdcall/tls_ldcall suffix
5405 'M': print the second word of a TImode operand.
5406 'N': print the second word of a DImode operand.
5407 'O': print only the displacement of a memory reference.
5408 'R': print only the base register of a memory reference.
5409 'S': print S-type memory reference (base+displacement).
5410 'Y': print shift count operand.
5412 'b': print integer X as if it's an unsigned byte.
5413 'c': print integer X as if it's an signed byte.
5414 'e': "end" of DImode contiguous bitmask X.
5415 'f': "end" of SImode contiguous bitmask X.
5416 'h': print integer X as if it's a signed halfword.
5417 'i': print the first nonzero HImode part of X.
5418 'j': print the first HImode part unequal to -1 of X.
5419 'k': print the first nonzero SImode part of X.
5420 'm': print the first SImode part unequal to -1 of X.
5421 'o': print integer X as if it's an unsigned 32bit word.
5422 's': "start" of DImode contiguous bitmask X.
5423 't': "start" of SImode contiguous bitmask X.
5424 'x': print integer X as if it's an unsigned halfword.
5428 print_operand (FILE *file, rtx x, int code)
5435 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
5439 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
5443 if (GET_CODE (x) == LE)
5444 fprintf (file, "l");
5445 else if (GET_CODE (x) == GT)
5446 fprintf (file, "h");
5448 output_operand_lossage ("invalid comparison operator "
5449 "for 'E' output modifier");
5453 if (GET_CODE (x) == SYMBOL_REF)
5455 fprintf (file, "%s", ":tls_load:");
5456 output_addr_const (file, x);
5458 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
5460 fprintf (file, "%s", ":tls_gdcall:");
5461 output_addr_const (file, XVECEXP (x, 0, 0));
5463 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
5465 fprintf (file, "%s", ":tls_ldcall:");
5466 const char *name = get_some_local_dynamic_name ();
5468 assemble_name (file, name);
5471 output_operand_lossage ("invalid reference for 'J' output modifier");
5475 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
5480 struct s390_address ad;
5485 output_operand_lossage ("memory reference expected for "
5486 "'O' output modifier");
5490 ret = s390_decompose_address (XEXP (x, 0), &ad);
5493 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5496 output_operand_lossage ("invalid address for 'O' output modifier");
5501 output_addr_const (file, ad.disp);
5503 fprintf (file, "0");
5509 struct s390_address ad;
5514 output_operand_lossage ("memory reference expected for "
5515 "'R' output modifier");
5519 ret = s390_decompose_address (XEXP (x, 0), &ad);
5522 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5525 output_operand_lossage ("invalid address for 'R' output modifier");
5530 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
5532 fprintf (file, "0");
5538 struct s390_address ad;
5543 output_operand_lossage ("memory reference expected for "
5544 "'S' output modifier");
5547 ret = s390_decompose_address (XEXP (x, 0), &ad);
5550 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5553 output_operand_lossage ("invalid address for 'S' output modifier");
5558 output_addr_const (file, ad.disp);
5560 fprintf (file, "0");
5563 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5568 if (GET_CODE (x) == REG)
5569 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5570 else if (GET_CODE (x) == MEM)
5571 x = change_address (x, VOIDmode,
5572 plus_constant (Pmode, XEXP (x, 0), 4));
5574 output_operand_lossage ("register or memory expression expected "
5575 "for 'N' output modifier");
5579 if (GET_CODE (x) == REG)
5580 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5581 else if (GET_CODE (x) == MEM)
5582 x = change_address (x, VOIDmode,
5583 plus_constant (Pmode, XEXP (x, 0), 8));
5585 output_operand_lossage ("register or memory expression expected "
5586 "for 'M' output modifier");
5590 print_shift_count_operand (file, x);
5594 switch (GET_CODE (x))
5597 fprintf (file, "%s", reg_names[REGNO (x)]);
5601 output_address (XEXP (x, 0));
5608 output_addr_const (file, x);
5621 ival = ((ival & 0xff) ^ 0x80) - 0x80;
5627 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
5630 ival = s390_extract_part (x, HImode, 0);
5633 ival = s390_extract_part (x, HImode, -1);
5636 ival = s390_extract_part (x, SImode, 0);
5639 ival = s390_extract_part (x, SImode, -1);
5650 len = (code == 's' || code == 'e' ? 64 : 32);
5651 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
5653 if (code == 's' || code == 't')
5654 ival = 64 - pos - len;
5656 ival = 64 - 1 - pos;
5660 output_operand_lossage ("invalid constant for output modifier '%c'", code);
5662 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
5666 gcc_assert (GET_MODE (x) == VOIDmode);
5668 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
5669 else if (code == 'x')
5670 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
5671 else if (code == 'h')
5672 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5673 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
5677 output_operand_lossage ("invalid constant - try using "
5678 "an output modifier");
5680 output_operand_lossage ("invalid constant for output modifier '%c'",
5687 output_operand_lossage ("invalid expression - try using "
5688 "an output modifier");
5690 output_operand_lossage ("invalid expression for output "
5691 "modifier '%c'", code);
5696 /* Target hook for assembling integer objects. We need to define it
5697 here to work a round a bug in some versions of GAS, which couldn't
5698 handle values smaller than INT_MIN when printed in decimal. */
5701 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
5703 if (size == 8 && aligned_p
5704 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
5706 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
5710 return default_assemble_integer (x, size, aligned_p);
5713 /* Returns true if register REGNO is used for forming
5714 a memory address in expression X. */
5717 reg_used_in_mem_p (int regno, rtx x)
5719 enum rtx_code code = GET_CODE (x);
5725 if (refers_to_regno_p (regno, regno+1,
5729 else if (code == SET
5730 && GET_CODE (SET_DEST (x)) == PC)
5732 if (refers_to_regno_p (regno, regno+1,
5737 fmt = GET_RTX_FORMAT (code);
5738 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5741 && reg_used_in_mem_p (regno, XEXP (x, i)))
5744 else if (fmt[i] == 'E')
5745 for (j = 0; j < XVECLEN (x, i); j++)
5746 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
5752 /* Returns true if expression DEP_RTX sets an address register
5753 used by instruction INSN to address memory. */
5756 addr_generation_dependency_p (rtx dep_rtx, rtx insn)
5760 if (NONJUMP_INSN_P (dep_rtx))
5761 dep_rtx = PATTERN (dep_rtx);
5763 if (GET_CODE (dep_rtx) == SET)
5765 target = SET_DEST (dep_rtx);
5766 if (GET_CODE (target) == STRICT_LOW_PART)
5767 target = XEXP (target, 0);
5768 while (GET_CODE (target) == SUBREG)
5769 target = SUBREG_REG (target);
5771 if (GET_CODE (target) == REG)
5773 int regno = REGNO (target);
5775 if (s390_safe_attr_type (insn) == TYPE_LA)
5777 pat = PATTERN (insn);
5778 if (GET_CODE (pat) == PARALLEL)
5780 gcc_assert (XVECLEN (pat, 0) == 2);
5781 pat = XVECEXP (pat, 0, 0);
5783 gcc_assert (GET_CODE (pat) == SET);
5784 return refers_to_regno_p (regno, regno+1, SET_SRC (pat), 0);
5786 else if (get_attr_atype (insn) == ATYPE_AGEN)
5787 return reg_used_in_mem_p (regno, PATTERN (insn));
5793 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
5796 s390_agen_dep_p (rtx dep_insn, rtx insn)
5798 rtx dep_rtx = PATTERN (dep_insn);
5801 if (GET_CODE (dep_rtx) == SET
5802 && addr_generation_dependency_p (dep_rtx, insn))
5804 else if (GET_CODE (dep_rtx) == PARALLEL)
5806 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
5808 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
5816 /* A C statement (sans semicolon) to update the integer scheduling priority
5817 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
5818 reduce the priority to execute INSN later. Do not define this macro if
5819 you do not need to adjust the scheduling priorities of insns.
5821 A STD instruction should be scheduled earlier,
5822 in order to use the bypass. */
5824 s390_adjust_priority (rtx_insn *insn, int priority)
5826 if (! INSN_P (insn))
5829 if (s390_tune != PROCESSOR_2084_Z990
5830 && s390_tune != PROCESSOR_2094_Z9_109
5831 && s390_tune != PROCESSOR_2097_Z10
5832 && s390_tune != PROCESSOR_2817_Z196
5833 && s390_tune != PROCESSOR_2827_ZEC12)
5836 switch (s390_safe_attr_type (insn))
5840 priority = priority << 3;
5844 priority = priority << 1;
5853 /* The number of instructions that can be issued per cycle. */
5856 s390_issue_rate (void)
5860 case PROCESSOR_2084_Z990:
5861 case PROCESSOR_2094_Z9_109:
5862 case PROCESSOR_2817_Z196:
5864 case PROCESSOR_2097_Z10:
5865 case PROCESSOR_2827_ZEC12:
5873 s390_first_cycle_multipass_dfa_lookahead (void)
5878 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
5879 Fix up MEMs as required. */
5882 annotate_constant_pool_refs (rtx *x)
5887 gcc_assert (GET_CODE (*x) != SYMBOL_REF
5888 || !CONSTANT_POOL_ADDRESS_P (*x));
5890 /* Literal pool references can only occur inside a MEM ... */
5891 if (GET_CODE (*x) == MEM)
5893 rtx memref = XEXP (*x, 0);
5895 if (GET_CODE (memref) == SYMBOL_REF
5896 && CONSTANT_POOL_ADDRESS_P (memref))
5898 rtx base = cfun->machine->base_reg;
5899 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
5902 *x = replace_equiv_address (*x, addr);
5906 if (GET_CODE (memref) == CONST
5907 && GET_CODE (XEXP (memref, 0)) == PLUS
5908 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
5909 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
5910 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
5912 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
5913 rtx sym = XEXP (XEXP (memref, 0), 0);
5914 rtx base = cfun->machine->base_reg;
5915 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5918 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
5923 /* ... or a load-address type pattern. */
5924 if (GET_CODE (*x) == SET)
5926 rtx addrref = SET_SRC (*x);
5928 if (GET_CODE (addrref) == SYMBOL_REF
5929 && CONSTANT_POOL_ADDRESS_P (addrref))
5931 rtx base = cfun->machine->base_reg;
5932 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
5935 SET_SRC (*x) = addr;
5939 if (GET_CODE (addrref) == CONST
5940 && GET_CODE (XEXP (addrref, 0)) == PLUS
5941 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
5942 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
5943 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
5945 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
5946 rtx sym = XEXP (XEXP (addrref, 0), 0);
5947 rtx base = cfun->machine->base_reg;
5948 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5951 SET_SRC (*x) = plus_constant (Pmode, addr, off);
5956 /* Annotate LTREL_BASE as well. */
5957 if (GET_CODE (*x) == UNSPEC
5958 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
5960 rtx base = cfun->machine->base_reg;
5961 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
5966 fmt = GET_RTX_FORMAT (GET_CODE (*x));
5967 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
5971 annotate_constant_pool_refs (&XEXP (*x, i));
5973 else if (fmt[i] == 'E')
5975 for (j = 0; j < XVECLEN (*x, i); j++)
5976 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
5981 /* Split all branches that exceed the maximum distance.
5982 Returns true if this created a new literal pool entry. */
5985 s390_split_branches (void)
5987 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
5988 int new_literal = 0, ret;
5993 /* We need correct insn addresses. */
5995 shorten_branches (get_insns ());
5997 /* Find all branches that exceed 64KB, and split them. */
5999 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6001 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
6004 pat = PATTERN (insn);
6005 if (GET_CODE (pat) == PARALLEL)
6006 pat = XVECEXP (pat, 0, 0);
6007 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
6010 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
6012 label = &SET_SRC (pat);
6014 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
6016 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
6017 label = &XEXP (SET_SRC (pat), 1);
6018 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
6019 label = &XEXP (SET_SRC (pat), 2);
6026 if (get_attr_length (insn) <= 4)
6029 /* We are going to use the return register as scratch register,
6030 make sure it will be saved/restored by the prologue/epilogue. */
6031 cfun_frame_layout.save_return_addr_p = 1;
6036 rtx mem = force_const_mem (Pmode, *label);
6037 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, mem), insn);
6038 INSN_ADDRESSES_NEW (set_insn, -1);
6039 annotate_constant_pool_refs (&PATTERN (set_insn));
6046 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
6047 UNSPEC_LTREL_OFFSET);
6048 target = gen_rtx_CONST (Pmode, target);
6049 target = force_const_mem (Pmode, target);
6050 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, target), insn);
6051 INSN_ADDRESSES_NEW (set_insn, -1);
6052 annotate_constant_pool_refs (&PATTERN (set_insn));
6054 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
6055 cfun->machine->base_reg),
6057 target = gen_rtx_PLUS (Pmode, temp_reg, target);
6060 ret = validate_change (insn, label, target, 0);
6068 /* Find an annotated literal pool symbol referenced in RTX X,
6069 and store it at REF. Will abort if X contains references to
6070 more than one such pool symbol; multiple references to the same
6071 symbol are allowed, however.
6073 The rtx pointed to by REF must be initialized to NULL_RTX
6074 by the caller before calling this routine. */
6077 find_constant_pool_ref (rtx x, rtx *ref)
6082 /* Ignore LTREL_BASE references. */
6083 if (GET_CODE (x) == UNSPEC
6084 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6086 /* Likewise POOL_ENTRY insns. */
6087 if (GET_CODE (x) == UNSPEC_VOLATILE
6088 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
6091 gcc_assert (GET_CODE (x) != SYMBOL_REF
6092 || !CONSTANT_POOL_ADDRESS_P (x));
6094 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
6096 rtx sym = XVECEXP (x, 0, 0);
6097 gcc_assert (GET_CODE (sym) == SYMBOL_REF
6098 && CONSTANT_POOL_ADDRESS_P (sym));
6100 if (*ref == NULL_RTX)
6103 gcc_assert (*ref == sym);
6108 fmt = GET_RTX_FORMAT (GET_CODE (x));
6109 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6113 find_constant_pool_ref (XEXP (x, i), ref);
6115 else if (fmt[i] == 'E')
6117 for (j = 0; j < XVECLEN (x, i); j++)
6118 find_constant_pool_ref (XVECEXP (x, i, j), ref);
6123 /* Replace every reference to the annotated literal pool
6124 symbol REF in X by its base plus OFFSET. */
6127 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
6132 gcc_assert (*x != ref);
6134 if (GET_CODE (*x) == UNSPEC
6135 && XINT (*x, 1) == UNSPEC_LTREF
6136 && XVECEXP (*x, 0, 0) == ref)
6138 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
6142 if (GET_CODE (*x) == PLUS
6143 && GET_CODE (XEXP (*x, 1)) == CONST_INT
6144 && GET_CODE (XEXP (*x, 0)) == UNSPEC
6145 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
6146 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
6148 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
6149 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
6153 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6154 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6158 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
6160 else if (fmt[i] == 'E')
6162 for (j = 0; j < XVECLEN (*x, i); j++)
6163 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
6168 /* Check whether X contains an UNSPEC_LTREL_BASE.
6169 Return its constant pool symbol if found, NULL_RTX otherwise. */
6172 find_ltrel_base (rtx x)
6177 if (GET_CODE (x) == UNSPEC
6178 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6179 return XVECEXP (x, 0, 0);
6181 fmt = GET_RTX_FORMAT (GET_CODE (x));
6182 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6186 rtx fnd = find_ltrel_base (XEXP (x, i));
6190 else if (fmt[i] == 'E')
6192 for (j = 0; j < XVECLEN (x, i); j++)
6194 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
6204 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
6207 replace_ltrel_base (rtx *x)
6212 if (GET_CODE (*x) == UNSPEC
6213 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
6215 *x = XVECEXP (*x, 0, 1);
6219 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6220 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6224 replace_ltrel_base (&XEXP (*x, i));
6226 else if (fmt[i] == 'E')
6228 for (j = 0; j < XVECLEN (*x, i); j++)
6229 replace_ltrel_base (&XVECEXP (*x, i, j));
6235 /* We keep a list of constants which we have to add to internal
6236 constant tables in the middle of large functions. */
6238 #define NR_C_MODES 11
6239 enum machine_mode constant_modes[NR_C_MODES] =
6241 TFmode, TImode, TDmode,
6242 DFmode, DImode, DDmode,
6243 SFmode, SImode, SDmode,
6250 struct constant *next;
6252 rtx_code_label *label;
6255 struct constant_pool
6257 struct constant_pool *next;
6258 rtx_insn *first_insn;
6259 rtx_insn *pool_insn;
6261 rtx_insn *emit_pool_after;
6263 struct constant *constants[NR_C_MODES];
6264 struct constant *execute;
6265 rtx_code_label *label;
6269 /* Allocate new constant_pool structure. */
6271 static struct constant_pool *
6272 s390_alloc_pool (void)
6274 struct constant_pool *pool;
6277 pool = (struct constant_pool *) xmalloc (sizeof *pool);
6279 for (i = 0; i < NR_C_MODES; i++)
6280 pool->constants[i] = NULL;
6282 pool->execute = NULL;
6283 pool->label = gen_label_rtx ();
6284 pool->first_insn = NULL;
6285 pool->pool_insn = NULL;
6286 pool->insns = BITMAP_ALLOC (NULL);
6288 pool->emit_pool_after = NULL;
6293 /* Create new constant pool covering instructions starting at INSN
6294 and chain it to the end of POOL_LIST. */
6296 static struct constant_pool *
6297 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
6299 struct constant_pool *pool, **prev;
6301 pool = s390_alloc_pool ();
6302 pool->first_insn = insn;
6304 for (prev = pool_list; *prev; prev = &(*prev)->next)
6311 /* End range of instructions covered by POOL at INSN and emit
6312 placeholder insn representing the pool. */
6315 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
6317 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
6320 insn = get_last_insn ();
6322 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
6323 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6326 /* Add INSN to the list of insns covered by POOL. */
6329 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
6331 bitmap_set_bit (pool->insns, INSN_UID (insn));
6334 /* Return pool out of POOL_LIST that covers INSN. */
6336 static struct constant_pool *
6337 s390_find_pool (struct constant_pool *pool_list, rtx insn)
6339 struct constant_pool *pool;
6341 for (pool = pool_list; pool; pool = pool->next)
6342 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
6348 /* Add constant VAL of mode MODE to the constant pool POOL. */
6351 s390_add_constant (struct constant_pool *pool, rtx val, enum machine_mode mode)
6356 for (i = 0; i < NR_C_MODES; i++)
6357 if (constant_modes[i] == mode)
6359 gcc_assert (i != NR_C_MODES);
6361 for (c = pool->constants[i]; c != NULL; c = c->next)
6362 if (rtx_equal_p (val, c->value))
6367 c = (struct constant *) xmalloc (sizeof *c);
6369 c->label = gen_label_rtx ();
6370 c->next = pool->constants[i];
6371 pool->constants[i] = c;
6372 pool->size += GET_MODE_SIZE (mode);
6376 /* Return an rtx that represents the offset of X from the start of
6380 s390_pool_offset (struct constant_pool *pool, rtx x)
6384 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
6385 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
6386 UNSPEC_POOL_OFFSET);
6387 return gen_rtx_CONST (GET_MODE (x), x);
6390 /* Find constant VAL of mode MODE in the constant pool POOL.
6391 Return an RTX describing the distance from the start of
6392 the pool to the location of the new constant. */
6395 s390_find_constant (struct constant_pool *pool, rtx val,
6396 enum machine_mode mode)
6401 for (i = 0; i < NR_C_MODES; i++)
6402 if (constant_modes[i] == mode)
6404 gcc_assert (i != NR_C_MODES);
6406 for (c = pool->constants[i]; c != NULL; c = c->next)
6407 if (rtx_equal_p (val, c->value))
6412 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6415 /* Check whether INSN is an execute. Return the label_ref to its
6416 execute target template if so, NULL_RTX otherwise. */
6419 s390_execute_label (rtx insn)
6421 if (NONJUMP_INSN_P (insn)
6422 && GET_CODE (PATTERN (insn)) == PARALLEL
6423 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
6424 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
6425 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
6430 /* Add execute target for INSN to the constant pool POOL. */
6433 s390_add_execute (struct constant_pool *pool, rtx insn)
6437 for (c = pool->execute; c != NULL; c = c->next)
6438 if (INSN_UID (insn) == INSN_UID (c->value))
6443 c = (struct constant *) xmalloc (sizeof *c);
6445 c->label = gen_label_rtx ();
6446 c->next = pool->execute;
6452 /* Find execute target for INSN in the constant pool POOL.
6453 Return an RTX describing the distance from the start of
6454 the pool to the location of the execute target. */
6457 s390_find_execute (struct constant_pool *pool, rtx insn)
6461 for (c = pool->execute; c != NULL; c = c->next)
6462 if (INSN_UID (insn) == INSN_UID (c->value))
6467 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6470 /* For an execute INSN, extract the execute target template. */
6473 s390_execute_target (rtx insn)
6475 rtx pattern = PATTERN (insn);
6476 gcc_assert (s390_execute_label (insn));
6478 if (XVECLEN (pattern, 0) == 2)
6480 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
6484 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
6487 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
6488 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
6490 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
6496 /* Indicate that INSN cannot be duplicated. This is the case for
6497 execute insns that carry a unique label. */
6500 s390_cannot_copy_insn_p (rtx_insn *insn)
6502 rtx label = s390_execute_label (insn);
6503 return label && label != const0_rtx;
6506 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
6507 do not emit the pool base label. */
6510 s390_dump_pool (struct constant_pool *pool, bool remote_label)
6513 rtx_insn *insn = pool->pool_insn;
6516 /* Switch to rodata section. */
6517 if (TARGET_CPU_ZARCH)
6519 insn = emit_insn_after (gen_pool_section_start (), insn);
6520 INSN_ADDRESSES_NEW (insn, -1);
6523 /* Ensure minimum pool alignment. */
6524 if (TARGET_CPU_ZARCH)
6525 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
6527 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
6528 INSN_ADDRESSES_NEW (insn, -1);
6530 /* Emit pool base label. */
6533 insn = emit_label_after (pool->label, insn);
6534 INSN_ADDRESSES_NEW (insn, -1);
6537 /* Dump constants in descending alignment requirement order,
6538 ensuring proper alignment for every constant. */
6539 for (i = 0; i < NR_C_MODES; i++)
6540 for (c = pool->constants[i]; c; c = c->next)
6542 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
6543 rtx value = copy_rtx (c->value);
6544 if (GET_CODE (value) == CONST
6545 && GET_CODE (XEXP (value, 0)) == UNSPEC
6546 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
6547 && XVECLEN (XEXP (value, 0), 0) == 1)
6548 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
6550 insn = emit_label_after (c->label, insn);
6551 INSN_ADDRESSES_NEW (insn, -1);
6553 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
6554 gen_rtvec (1, value),
6555 UNSPECV_POOL_ENTRY);
6556 insn = emit_insn_after (value, insn);
6557 INSN_ADDRESSES_NEW (insn, -1);
6560 /* Ensure minimum alignment for instructions. */
6561 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
6562 INSN_ADDRESSES_NEW (insn, -1);
6564 /* Output in-pool execute template insns. */
6565 for (c = pool->execute; c; c = c->next)
6567 insn = emit_label_after (c->label, insn);
6568 INSN_ADDRESSES_NEW (insn, -1);
6570 insn = emit_insn_after (s390_execute_target (c->value), insn);
6571 INSN_ADDRESSES_NEW (insn, -1);
6574 /* Switch back to previous section. */
6575 if (TARGET_CPU_ZARCH)
6577 insn = emit_insn_after (gen_pool_section_end (), insn);
6578 INSN_ADDRESSES_NEW (insn, -1);
6581 insn = emit_barrier_after (insn);
6582 INSN_ADDRESSES_NEW (insn, -1);
6584 /* Remove placeholder insn. */
6585 remove_insn (pool->pool_insn);
6588 /* Free all memory used by POOL. */
6591 s390_free_pool (struct constant_pool *pool)
6593 struct constant *c, *next;
6596 for (i = 0; i < NR_C_MODES; i++)
6597 for (c = pool->constants[i]; c; c = next)
6603 for (c = pool->execute; c; c = next)
6609 BITMAP_FREE (pool->insns);
6614 /* Collect main literal pool. Return NULL on overflow. */
6616 static struct constant_pool *
6617 s390_mainpool_start (void)
6619 struct constant_pool *pool;
6622 pool = s390_alloc_pool ();
6624 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6626 if (NONJUMP_INSN_P (insn)
6627 && GET_CODE (PATTERN (insn)) == SET
6628 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
6629 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
6631 /* There might be two main_pool instructions if base_reg
6632 is call-clobbered; one for shrink-wrapped code and one
6633 for the rest. We want to keep the first. */
6634 if (pool->pool_insn)
6636 insn = PREV_INSN (insn);
6637 delete_insn (NEXT_INSN (insn));
6640 pool->pool_insn = insn;
6643 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6645 s390_add_execute (pool, insn);
6647 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
6649 rtx pool_ref = NULL_RTX;
6650 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6653 rtx constant = get_pool_constant (pool_ref);
6654 enum machine_mode mode = get_pool_mode (pool_ref);
6655 s390_add_constant (pool, constant, mode);
6659 /* If hot/cold partitioning is enabled we have to make sure that
6660 the literal pool is emitted in the same section where the
6661 initialization of the literal pool base pointer takes place.
6662 emit_pool_after is only used in the non-overflow case on non
6663 Z cpus where we can emit the literal pool at the end of the
6664 function body within the text section. */
6666 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
6667 && !pool->emit_pool_after)
6668 pool->emit_pool_after = PREV_INSN (insn);
6671 gcc_assert (pool->pool_insn || pool->size == 0);
6673 if (pool->size >= 4096)
6675 /* We're going to chunkify the pool, so remove the main
6676 pool placeholder insn. */
6677 remove_insn (pool->pool_insn);
6679 s390_free_pool (pool);
6683 /* If the functions ends with the section where the literal pool
6684 should be emitted set the marker to its end. */
6685 if (pool && !pool->emit_pool_after)
6686 pool->emit_pool_after = get_last_insn ();
6691 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6692 Modify the current function to output the pool constants as well as
6693 the pool register setup instruction. */
6696 s390_mainpool_finish (struct constant_pool *pool)
6698 rtx base_reg = cfun->machine->base_reg;
6700 /* If the pool is empty, we're done. */
6701 if (pool->size == 0)
6703 /* We don't actually need a base register after all. */
6704 cfun->machine->base_reg = NULL_RTX;
6706 if (pool->pool_insn)
6707 remove_insn (pool->pool_insn);
6708 s390_free_pool (pool);
6712 /* We need correct insn addresses. */
6713 shorten_branches (get_insns ());
6715 /* On zSeries, we use a LARL to load the pool register. The pool is
6716 located in the .rodata section, so we emit it after the function. */
6717 if (TARGET_CPU_ZARCH)
6719 rtx set = gen_main_base_64 (base_reg, pool->label);
6720 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
6721 INSN_ADDRESSES_NEW (insn, -1);
6722 remove_insn (pool->pool_insn);
6724 insn = get_last_insn ();
6725 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6726 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6728 s390_dump_pool (pool, 0);
6731 /* On S/390, if the total size of the function's code plus literal pool
6732 does not exceed 4096 bytes, we use BASR to set up a function base
6733 pointer, and emit the literal pool at the end of the function. */
6734 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
6735 + pool->size + 8 /* alignment slop */ < 4096)
6737 rtx set = gen_main_base_31_small (base_reg, pool->label);
6738 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
6739 INSN_ADDRESSES_NEW (insn, -1);
6740 remove_insn (pool->pool_insn);
6742 insn = emit_label_after (pool->label, insn);
6743 INSN_ADDRESSES_NEW (insn, -1);
6745 /* emit_pool_after will be set by s390_mainpool_start to the
6746 last insn of the section where the literal pool should be
6748 insn = pool->emit_pool_after;
6750 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6751 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6753 s390_dump_pool (pool, 1);
6756 /* Otherwise, we emit an inline literal pool and use BASR to branch
6757 over it, setting up the pool register at the same time. */
6760 rtx_code_label *pool_end = gen_label_rtx ();
6762 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
6763 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
6764 JUMP_LABEL (insn) = pool_end;
6765 INSN_ADDRESSES_NEW (insn, -1);
6766 remove_insn (pool->pool_insn);
6768 insn = emit_label_after (pool->label, insn);
6769 INSN_ADDRESSES_NEW (insn, -1);
6771 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6772 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6774 insn = emit_label_after (pool_end, pool->pool_insn);
6775 INSN_ADDRESSES_NEW (insn, -1);
6777 s390_dump_pool (pool, 1);
6781 /* Replace all literal pool references. */
6783 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
6786 replace_ltrel_base (&PATTERN (insn));
6788 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
6790 rtx addr, pool_ref = NULL_RTX;
6791 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6794 if (s390_execute_label (insn))
6795 addr = s390_find_execute (pool, insn);
6797 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
6798 get_pool_mode (pool_ref));
6800 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
6801 INSN_CODE (insn) = -1;
6807 /* Free the pool. */
6808 s390_free_pool (pool);
6811 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6812 We have decided we cannot use this pool, so revert all changes
6813 to the current function that were done by s390_mainpool_start. */
6815 s390_mainpool_cancel (struct constant_pool *pool)
6817 /* We didn't actually change the instruction stream, so simply
6818 free the pool memory. */
6819 s390_free_pool (pool);
6823 /* Chunkify the literal pool. */
6825 #define S390_POOL_CHUNK_MIN 0xc00
6826 #define S390_POOL_CHUNK_MAX 0xe00
6828 static struct constant_pool *
6829 s390_chunkify_start (void)
6831 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
6834 rtx pending_ltrel = NULL_RTX;
6837 rtx (*gen_reload_base) (rtx, rtx) =
6838 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
6841 /* We need correct insn addresses. */
6843 shorten_branches (get_insns ());
6845 /* Scan all insns and move literals to pool chunks. */
6847 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6849 bool section_switch_p = false;
6851 /* Check for pending LTREL_BASE. */
6854 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
6857 gcc_assert (ltrel_base == pending_ltrel);
6858 pending_ltrel = NULL_RTX;
6862 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6865 curr_pool = s390_start_pool (&pool_list, insn);
6867 s390_add_execute (curr_pool, insn);
6868 s390_add_pool_insn (curr_pool, insn);
6870 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
6872 rtx pool_ref = NULL_RTX;
6873 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6876 rtx constant = get_pool_constant (pool_ref);
6877 enum machine_mode mode = get_pool_mode (pool_ref);
6880 curr_pool = s390_start_pool (&pool_list, insn);
6882 s390_add_constant (curr_pool, constant, mode);
6883 s390_add_pool_insn (curr_pool, insn);
6885 /* Don't split the pool chunk between a LTREL_OFFSET load
6886 and the corresponding LTREL_BASE. */
6887 if (GET_CODE (constant) == CONST
6888 && GET_CODE (XEXP (constant, 0)) == UNSPEC
6889 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
6891 gcc_assert (!pending_ltrel);
6892 pending_ltrel = pool_ref;
6897 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
6900 s390_add_pool_insn (curr_pool, insn);
6901 /* An LTREL_BASE must follow within the same basic block. */
6902 gcc_assert (!pending_ltrel);
6906 switch (NOTE_KIND (insn))
6908 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
6909 section_switch_p = true;
6911 case NOTE_INSN_VAR_LOCATION:
6912 case NOTE_INSN_CALL_ARG_LOCATION:
6919 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
6920 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
6923 if (TARGET_CPU_ZARCH)
6925 if (curr_pool->size < S390_POOL_CHUNK_MAX)
6928 s390_end_pool (curr_pool, NULL);
6933 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
6934 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
6937 /* We will later have to insert base register reload insns.
6938 Those will have an effect on code size, which we need to
6939 consider here. This calculation makes rather pessimistic
6940 worst-case assumptions. */
6944 if (chunk_size < S390_POOL_CHUNK_MIN
6945 && curr_pool->size < S390_POOL_CHUNK_MIN
6946 && !section_switch_p)
6949 /* Pool chunks can only be inserted after BARRIERs ... */
6950 if (BARRIER_P (insn))
6952 s390_end_pool (curr_pool, insn);
6957 /* ... so if we don't find one in time, create one. */
6958 else if (chunk_size > S390_POOL_CHUNK_MAX
6959 || curr_pool->size > S390_POOL_CHUNK_MAX
6960 || section_switch_p)
6962 rtx_insn *label, *jump, *barrier, *next, *prev;
6964 if (!section_switch_p)
6966 /* We can insert the barrier only after a 'real' insn. */
6967 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
6969 if (get_attr_length (insn) == 0)
6971 /* Don't separate LTREL_BASE from the corresponding
6972 LTREL_OFFSET load. */
6979 next = NEXT_INSN (insn);
6983 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
6984 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
6988 gcc_assert (!pending_ltrel);
6990 /* The old pool has to end before the section switch
6991 note in order to make it part of the current
6993 insn = PREV_INSN (insn);
6996 label = gen_label_rtx ();
6998 if (prev && NOTE_P (prev))
6999 prev = prev_nonnote_insn (prev);
7001 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
7002 INSN_LOCATION (prev));
7004 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
7005 barrier = emit_barrier_after (jump);
7006 insn = emit_label_after (label, barrier);
7007 JUMP_LABEL (jump) = label;
7008 LABEL_NUSES (label) = 1;
7010 INSN_ADDRESSES_NEW (jump, -1);
7011 INSN_ADDRESSES_NEW (barrier, -1);
7012 INSN_ADDRESSES_NEW (insn, -1);
7014 s390_end_pool (curr_pool, barrier);
7022 s390_end_pool (curr_pool, NULL);
7023 gcc_assert (!pending_ltrel);
7025 /* Find all labels that are branched into
7026 from an insn belonging to a different chunk. */
7028 far_labels = BITMAP_ALLOC (NULL);
7030 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7032 rtx_jump_table_data *table;
7034 /* Labels marked with LABEL_PRESERVE_P can be target
7035 of non-local jumps, so we have to mark them.
7036 The same holds for named labels.
7038 Don't do that, however, if it is the label before
7042 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
7044 rtx_insn *vec_insn = NEXT_INSN (insn);
7045 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
7046 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
7048 /* Check potential targets in a table jump (casesi_jump). */
7049 else if (tablejump_p (insn, NULL, &table))
7051 rtx vec_pat = PATTERN (table);
7052 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
7054 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
7056 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
7058 if (s390_find_pool (pool_list, label)
7059 != s390_find_pool (pool_list, insn))
7060 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7063 /* If we have a direct jump (conditional or unconditional),
7064 check all potential targets. */
7065 else if (JUMP_P (insn))
7067 rtx pat = PATTERN (insn);
7069 if (GET_CODE (pat) == PARALLEL)
7070 pat = XVECEXP (pat, 0, 0);
7072 if (GET_CODE (pat) == SET)
7074 rtx label = JUMP_LABEL (insn);
7075 if (label && !ANY_RETURN_P (label))
7077 if (s390_find_pool (pool_list, label)
7078 != s390_find_pool (pool_list, insn))
7079 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7085 /* Insert base register reload insns before every pool. */
7087 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7089 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7091 rtx_insn *insn = curr_pool->first_insn;
7092 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
7095 /* Insert base register reload insns at every far label. */
7097 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7099 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
7101 struct constant_pool *pool = s390_find_pool (pool_list, insn);
7104 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7106 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
7111 BITMAP_FREE (far_labels);
7114 /* Recompute insn addresses. */
7116 init_insn_lengths ();
7117 shorten_branches (get_insns ());
7122 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7123 After we have decided to use this list, finish implementing
7124 all changes to the current function as required. */
7127 s390_chunkify_finish (struct constant_pool *pool_list)
7129 struct constant_pool *curr_pool = NULL;
7133 /* Replace all literal pool references. */
7135 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7138 replace_ltrel_base (&PATTERN (insn));
7140 curr_pool = s390_find_pool (pool_list, insn);
7144 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
7146 rtx addr, pool_ref = NULL_RTX;
7147 find_constant_pool_ref (PATTERN (insn), &pool_ref);
7150 if (s390_execute_label (insn))
7151 addr = s390_find_execute (curr_pool, insn);
7153 addr = s390_find_constant (curr_pool,
7154 get_pool_constant (pool_ref),
7155 get_pool_mode (pool_ref));
7157 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
7158 INSN_CODE (insn) = -1;
7163 /* Dump out all literal pools. */
7165 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7166 s390_dump_pool (curr_pool, 0);
7168 /* Free pool list. */
7172 struct constant_pool *next = pool_list->next;
7173 s390_free_pool (pool_list);
7178 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7179 We have decided we cannot use this list, so revert all changes
7180 to the current function that were done by s390_chunkify_start. */
7183 s390_chunkify_cancel (struct constant_pool *pool_list)
7185 struct constant_pool *curr_pool = NULL;
7188 /* Remove all pool placeholder insns. */
7190 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7192 /* Did we insert an extra barrier? Remove it. */
7193 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
7194 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
7195 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
7197 if (jump && JUMP_P (jump)
7198 && barrier && BARRIER_P (barrier)
7199 && label && LABEL_P (label)
7200 && GET_CODE (PATTERN (jump)) == SET
7201 && SET_DEST (PATTERN (jump)) == pc_rtx
7202 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
7203 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
7206 remove_insn (barrier);
7207 remove_insn (label);
7210 remove_insn (curr_pool->pool_insn);
7213 /* Remove all base register reload insns. */
7215 for (insn = get_insns (); insn; )
7217 rtx_insn *next_insn = NEXT_INSN (insn);
7219 if (NONJUMP_INSN_P (insn)
7220 && GET_CODE (PATTERN (insn)) == SET
7221 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
7222 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
7228 /* Free pool list. */
7232 struct constant_pool *next = pool_list->next;
7233 s390_free_pool (pool_list);
7238 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
7241 s390_output_pool_entry (rtx exp, enum machine_mode mode, unsigned int align)
7245 switch (GET_MODE_CLASS (mode))
7248 case MODE_DECIMAL_FLOAT:
7249 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
7251 REAL_VALUE_FROM_CONST_DOUBLE (r, exp);
7252 assemble_real (r, mode, align);
7256 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
7257 mark_symbol_refs_as_used (exp);
7266 /* Return an RTL expression representing the value of the return address
7267 for the frame COUNT steps up from the current frame. FRAME is the
7268 frame pointer of that frame. */
7271 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
7276 /* Without backchain, we fail for all but the current frame. */
7278 if (!TARGET_BACKCHAIN && count > 0)
7281 /* For the current frame, we need to make sure the initial
7282 value of RETURN_REGNUM is actually saved. */
7286 /* On non-z architectures branch splitting could overwrite r14. */
7287 if (TARGET_CPU_ZARCH)
7288 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
7291 cfun_frame_layout.save_return_addr_p = true;
7292 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
7296 if (TARGET_PACKED_STACK)
7297 offset = -2 * UNITS_PER_LONG;
7299 offset = RETURN_REGNUM * UNITS_PER_LONG;
7301 addr = plus_constant (Pmode, frame, offset);
7302 addr = memory_address (Pmode, addr);
7303 return gen_rtx_MEM (Pmode, addr);
7306 /* Return an RTL expression representing the back chain stored in
7307 the current stack frame. */
7310 s390_back_chain_rtx (void)
7314 gcc_assert (TARGET_BACKCHAIN);
7316 if (TARGET_PACKED_STACK)
7317 chain = plus_constant (Pmode, stack_pointer_rtx,
7318 STACK_POINTER_OFFSET - UNITS_PER_LONG);
7320 chain = stack_pointer_rtx;
7322 chain = gen_rtx_MEM (Pmode, chain);
7326 /* Find first call clobbered register unused in a function.
7327 This could be used as base register in a leaf function
7328 or for holding the return address before epilogue. */
7331 find_unused_clobbered_reg (void)
7334 for (i = 0; i < 6; i++)
7335 if (!df_regs_ever_live_p (i))
7341 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
7342 clobbered hard regs in SETREG. */
7345 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
7347 char *regs_ever_clobbered = (char *)data;
7348 unsigned int i, regno;
7349 enum machine_mode mode = GET_MODE (setreg);
7351 if (GET_CODE (setreg) == SUBREG)
7353 rtx inner = SUBREG_REG (setreg);
7354 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
7356 regno = subreg_regno (setreg);
7358 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
7359 regno = REGNO (setreg);
7364 i < regno + HARD_REGNO_NREGS (regno, mode);
7366 regs_ever_clobbered[i] = 1;
7369 /* Walks through all basic blocks of the current function looking
7370 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
7371 of the passed integer array REGS_EVER_CLOBBERED are set to one for
7372 each of those regs. */
7375 s390_regs_ever_clobbered (char regs_ever_clobbered[])
7381 memset (regs_ever_clobbered, 0, 32);
7383 /* For non-leaf functions we have to consider all call clobbered regs to be
7387 for (i = 0; i < 32; i++)
7388 regs_ever_clobbered[i] = call_really_used_regs[i];
7391 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
7392 this work is done by liveness analysis (mark_regs_live_at_end).
7393 Special care is needed for functions containing landing pads. Landing pads
7394 may use the eh registers, but the code which sets these registers is not
7395 contained in that function. Hence s390_regs_ever_clobbered is not able to
7396 deal with this automatically. */
7397 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
7398 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
7399 if (crtl->calls_eh_return
7400 || (cfun->machine->has_landing_pad_p
7401 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
7402 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
7404 /* For nonlocal gotos all call-saved registers have to be saved.
7405 This flag is also set for the unwinding code in libgcc.
7406 See expand_builtin_unwind_init. For regs_ever_live this is done by
7408 if (crtl->saves_all_registers)
7409 for (i = 0; i < 32; i++)
7410 if (!call_really_used_regs[i])
7411 regs_ever_clobbered[i] = 1;
7413 FOR_EACH_BB_FN (cur_bb, cfun)
7415 FOR_BB_INSNS (cur_bb, cur_insn)
7419 if (!INSN_P (cur_insn))
7422 pat = PATTERN (cur_insn);
7424 /* Ignore GPR restore insns. */
7425 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
7427 if (GET_CODE (pat) == SET
7428 && GENERAL_REG_P (SET_DEST (pat)))
7431 if (GET_MODE (SET_SRC (pat)) == DImode
7432 && FP_REG_P (SET_SRC (pat)))
7436 if (GET_CODE (SET_SRC (pat)) == MEM)
7441 if (GET_CODE (pat) == PARALLEL
7442 && load_multiple_operation (pat, VOIDmode))
7447 s390_reg_clobbered_rtx,
7448 regs_ever_clobbered);
7453 /* Determine the frame area which actually has to be accessed
7454 in the function epilogue. The values are stored at the
7455 given pointers AREA_BOTTOM (address of the lowest used stack
7456 address) and AREA_TOP (address of the first item which does
7457 not belong to the stack frame). */
7460 s390_frame_area (int *area_bottom, int *area_top)
7467 if (cfun_frame_layout.first_restore_gpr != -1)
7469 b = (cfun_frame_layout.gprs_offset
7470 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
7471 t = b + (cfun_frame_layout.last_restore_gpr
7472 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
7475 if (TARGET_64BIT && cfun_save_high_fprs_p)
7477 b = MIN (b, cfun_frame_layout.f8_offset);
7478 t = MAX (t, (cfun_frame_layout.f8_offset
7479 + cfun_frame_layout.high_fprs * 8));
7484 if (cfun_fpr_save_p (FPR4_REGNUM))
7486 b = MIN (b, cfun_frame_layout.f4_offset);
7487 t = MAX (t, cfun_frame_layout.f4_offset + 8);
7489 if (cfun_fpr_save_p (FPR6_REGNUM))
7491 b = MIN (b, cfun_frame_layout.f4_offset + 8);
7492 t = MAX (t, cfun_frame_layout.f4_offset + 16);
7498 /* Update gpr_save_slots in the frame layout trying to make use of
7499 FPRs as GPR save slots.
7500 This is a helper routine of s390_register_info. */
7503 s390_register_info_gprtofpr ()
7505 int save_reg_slot = FPR0_REGNUM;
7508 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
7511 for (i = 15; i >= 6; i--)
7513 if (cfun_gpr_save_slot (i) == 0)
7516 /* Advance to the next FP register which can be used as a
7518 while ((!call_really_used_regs[save_reg_slot]
7519 || df_regs_ever_live_p (save_reg_slot)
7520 || cfun_fpr_save_p (save_reg_slot))
7521 && FP_REGNO_P (save_reg_slot))
7523 if (!FP_REGNO_P (save_reg_slot))
7525 /* We only want to use ldgr/lgdr if we can get rid of
7526 stm/lm entirely. So undo the gpr slot allocation in
7527 case we ran out of FPR save slots. */
7528 for (j = 6; j <= 15; j++)
7529 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
7530 cfun_gpr_save_slot (j) = -1;
7533 cfun_gpr_save_slot (i) = save_reg_slot++;
7537 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
7539 This is a helper routine for s390_register_info. */
7542 s390_register_info_stdarg_fpr ()
7548 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
7549 f0-f4 for 64 bit. */
7551 || !TARGET_HARD_FLOAT
7552 || !cfun->va_list_fpr_size
7553 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
7556 min_fpr = crtl->args.info.fprs;
7557 max_fpr = min_fpr + cfun->va_list_fpr_size;
7558 if (max_fpr > FP_ARG_NUM_REG)
7559 max_fpr = FP_ARG_NUM_REG;
7561 for (i = min_fpr; i < max_fpr; i++)
7562 cfun_set_fpr_save (i + FPR0_REGNUM);
7565 /* Reserve the GPR save slots for GPRs which need to be saved due to
7567 This is a helper routine for s390_register_info. */
7570 s390_register_info_stdarg_gpr ()
7577 || !cfun->va_list_gpr_size
7578 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
7581 min_gpr = crtl->args.info.gprs;
7582 max_gpr = min_gpr + cfun->va_list_gpr_size;
7583 if (max_gpr > GP_ARG_NUM_REG)
7584 max_gpr = GP_ARG_NUM_REG;
7586 for (i = min_gpr; i < max_gpr; i++)
7587 cfun_gpr_save_slot (2 + i) = -1;
7590 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
7591 for registers which need to be saved in function prologue.
7592 This function can be used until the insns emitted for save/restore
7593 of the regs are visible in the RTL stream. */
7596 s390_register_info ()
7599 char clobbered_regs[32];
7601 gcc_assert (!epilogue_completed);
7603 if (reload_completed)
7604 /* After reload we rely on our own routine to determine which
7605 registers need saving. */
7606 s390_regs_ever_clobbered (clobbered_regs);
7608 /* During reload we use regs_ever_live as a base since reload
7609 does changes in there which we otherwise would not be aware
7611 for (i = 0; i < 32; i++)
7612 clobbered_regs[i] = df_regs_ever_live_p (i);
7614 for (i = 0; i < 32; i++)
7615 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
7617 /* Mark the call-saved FPRs which need to be saved.
7618 This needs to be done before checking the special GPRs since the
7619 stack pointer usage depends on whether high FPRs have to be saved
7621 cfun_frame_layout.fpr_bitmap = 0;
7622 cfun_frame_layout.high_fprs = 0;
7623 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
7624 if (clobbered_regs[i] && !call_really_used_regs[i])
7626 cfun_set_fpr_save (i);
7627 if (i >= FPR8_REGNUM)
7628 cfun_frame_layout.high_fprs++;
7632 clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
7633 |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7635 clobbered_regs[BASE_REGNUM]
7636 |= (cfun->machine->base_reg
7637 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
7639 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
7640 |= !!frame_pointer_needed;
7642 /* On pre z900 machines this might take until machine dependent
7644 save_return_addr_p will only be set on non-zarch machines so
7645 there is no risk that r14 goes into an FPR instead of a stack
7647 clobbered_regs[RETURN_REGNUM]
7649 || TARGET_TPF_PROFILING
7650 || cfun->machine->split_branches_pending_p
7651 || cfun_frame_layout.save_return_addr_p
7652 || crtl->calls_eh_return);
7654 clobbered_regs[STACK_POINTER_REGNUM]
7656 || TARGET_TPF_PROFILING
7657 || cfun_save_high_fprs_p
7658 || get_frame_size () > 0
7659 || (reload_completed && cfun_frame_layout.frame_size > 0)
7660 || cfun->calls_alloca);
7662 memset (cfun_frame_layout.gpr_save_slots, 0, 16);
7664 for (i = 6; i < 16; i++)
7665 if (clobbered_regs[i])
7666 cfun_gpr_save_slot (i) = -1;
7668 s390_register_info_stdarg_fpr ();
7669 s390_register_info_gprtofpr ();
7671 /* First find the range of GPRs to be restored. Vararg regs don't
7672 need to be restored so we do it before assigning slots to the
7674 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7675 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7676 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
7677 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
7679 /* stdarg functions might need to save GPRs 2 to 6. This might
7680 override the GPR->FPR save decision made above for r6 since
7681 vararg regs must go to the stack. */
7682 s390_register_info_stdarg_gpr ();
7684 /* Now the range of GPRs which need saving. */
7685 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7686 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7687 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
7688 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
7691 /* This function is called by s390_optimize_prologue in order to get
7692 rid of unnecessary GPR save/restore instructions. The register info
7693 for the GPRs is re-computed and the ranges are re-calculated. */
7696 s390_optimize_register_info ()
7698 char clobbered_regs[32];
7701 gcc_assert (epilogue_completed);
7702 gcc_assert (!cfun->machine->split_branches_pending_p);
7704 s390_regs_ever_clobbered (clobbered_regs);
7706 for (i = 0; i < 32; i++)
7707 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
7709 /* There is still special treatment needed for cases invisible to
7710 s390_regs_ever_clobbered. */
7711 clobbered_regs[RETURN_REGNUM]
7712 |= (TARGET_TPF_PROFILING
7713 /* When expanding builtin_return_addr in ESA mode we do not
7714 know whether r14 will later be needed as scratch reg when
7715 doing branch splitting. So the builtin always accesses the
7716 r14 save slot and we need to stick to the save/restore
7717 decision for r14 even if it turns out that it didn't get
7719 || cfun_frame_layout.save_return_addr_p
7720 || crtl->calls_eh_return);
7722 memset (cfun_frame_layout.gpr_save_slots, 0, 6);
7724 for (i = 6; i < 16; i++)
7725 if (!clobbered_regs[i])
7726 cfun_gpr_save_slot (i) = 0;
7728 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7729 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7730 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
7731 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
7733 s390_register_info_stdarg_gpr ();
7735 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7736 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7737 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
7738 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
7741 /* Fill cfun->machine with info about frame of current function. */
7744 s390_frame_info (void)
7746 HOST_WIDE_INT lowest_offset;
7748 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
7749 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
7751 /* The va_arg builtin uses a constant distance of 16 *
7752 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
7753 pointer. So even if we are going to save the stack pointer in an
7754 FPR we need the stack space in order to keep the offsets
7756 if (cfun->stdarg && cfun_save_arg_fprs_p)
7758 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
7760 if (cfun_frame_layout.first_save_gpr_slot == -1)
7761 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
7764 cfun_frame_layout.frame_size = get_frame_size ();
7765 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
7766 fatal_error ("total size of local variables exceeds architecture limit");
7768 if (!TARGET_PACKED_STACK)
7770 /* Fixed stack layout. */
7771 cfun_frame_layout.backchain_offset = 0;
7772 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
7773 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
7774 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
7775 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
7778 else if (TARGET_BACKCHAIN)
7780 /* Kernel stack layout - packed stack, backchain, no float */
7781 gcc_assert (TARGET_SOFT_FLOAT);
7782 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
7785 /* The distance between the backchain and the return address
7786 save slot must not change. So we always need a slot for the
7787 stack pointer which resides in between. */
7788 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
7790 cfun_frame_layout.gprs_offset
7791 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
7793 /* FPRs will not be saved. Nevertheless pick sane values to
7794 keep area calculations valid. */
7795 cfun_frame_layout.f0_offset =
7796 cfun_frame_layout.f4_offset =
7797 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
7803 /* Packed stack layout without backchain. */
7805 /* With stdarg FPRs need their dedicated slots. */
7806 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
7807 : (cfun_fpr_save_p (FPR4_REGNUM) +
7808 cfun_fpr_save_p (FPR6_REGNUM)));
7809 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
7811 num_fprs = (cfun->stdarg ? 2
7812 : (cfun_fpr_save_p (FPR0_REGNUM)
7813 + cfun_fpr_save_p (FPR2_REGNUM)));
7814 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
7816 cfun_frame_layout.gprs_offset
7817 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
7819 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
7820 - cfun_frame_layout.high_fprs * 8);
7823 if (cfun_save_high_fprs_p)
7824 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
7827 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
7829 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
7830 sized area at the bottom of the stack. This is required also for
7831 leaf functions. When GCC generates a local stack reference it
7832 will always add STACK_POINTER_OFFSET to all these references. */
7834 && !TARGET_TPF_PROFILING
7835 && cfun_frame_layout.frame_size == 0
7836 && !cfun->calls_alloca)
7839 /* Calculate the number of bytes we have used in our own register
7840 save area. With the packed stack layout we can re-use the
7841 remaining bytes for normal stack elements. */
7843 if (TARGET_PACKED_STACK)
7844 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
7845 cfun_frame_layout.f4_offset),
7846 cfun_frame_layout.gprs_offset);
7850 if (TARGET_BACKCHAIN)
7851 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
7853 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
7855 /* If under 31 bit an odd number of gprs has to be saved we have to
7856 adjust the frame size to sustain 8 byte alignment of stack
7858 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
7859 STACK_BOUNDARY / BITS_PER_UNIT - 1)
7860 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
7863 /* Generate frame layout. Fills in register and frame data for the current
7864 function in cfun->machine. This routine can be called multiple times;
7865 it will re-do the complete frame layout every time. */
7868 s390_init_frame_layout (void)
7870 HOST_WIDE_INT frame_size;
7873 gcc_assert (!reload_completed);
7875 /* On S/390 machines, we may need to perform branch splitting, which
7876 will require both base and return address register. We have no
7877 choice but to assume we're going to need them until right at the
7878 end of the machine dependent reorg phase. */
7879 if (!TARGET_CPU_ZARCH)
7880 cfun->machine->split_branches_pending_p = true;
7884 frame_size = cfun_frame_layout.frame_size;
7886 /* Try to predict whether we'll need the base register. */
7887 base_used = cfun->machine->split_branches_pending_p
7888 || crtl->uses_const_pool
7889 || (!DISP_IN_RANGE (frame_size)
7890 && !CONST_OK_FOR_K (frame_size));
7892 /* Decide which register to use as literal pool base. In small
7893 leaf functions, try to use an unused call-clobbered register
7894 as base register to avoid save/restore overhead. */
7896 cfun->machine->base_reg = NULL_RTX;
7897 else if (crtl->is_leaf && !df_regs_ever_live_p (5))
7898 cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
7900 cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
7902 s390_register_info ();
7905 while (frame_size != cfun_frame_layout.frame_size);
7908 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
7909 the TX is nonescaping. A transaction is considered escaping if
7910 there is at least one path from tbegin returning CC0 to the
7911 function exit block without an tend.
7913 The check so far has some limitations:
7914 - only single tbegin/tend BBs are supported
7915 - the first cond jump after tbegin must separate the CC0 path from ~CC0
7916 - when CC is copied to a GPR and the CC0 check is done with the GPR
7917 this is not supported
7921 s390_optimize_nonescaping_tx (void)
7923 const unsigned int CC0 = 1 << 3;
7924 basic_block tbegin_bb = NULL;
7925 basic_block tend_bb = NULL;
7930 rtx_insn *tbegin_insn = NULL;
7932 if (!cfun->machine->tbegin_p)
7935 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
7937 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
7942 FOR_BB_INSNS (bb, insn)
7944 rtx ite, cc, pat, target;
7945 unsigned HOST_WIDE_INT mask;
7947 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
7950 pat = PATTERN (insn);
7952 if (GET_CODE (pat) == PARALLEL)
7953 pat = XVECEXP (pat, 0, 0);
7955 if (GET_CODE (pat) != SET
7956 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
7959 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
7965 /* Just return if the tbegin doesn't have clobbers. */
7966 if (GET_CODE (PATTERN (insn)) != PARALLEL)
7969 if (tbegin_bb != NULL)
7972 /* Find the next conditional jump. */
7973 for (tmp = NEXT_INSN (insn);
7975 tmp = NEXT_INSN (tmp))
7977 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
7982 ite = SET_SRC (PATTERN (tmp));
7983 if (GET_CODE (ite) != IF_THEN_ELSE)
7986 cc = XEXP (XEXP (ite, 0), 0);
7987 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
7988 || GET_MODE (cc) != CCRAWmode
7989 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
7992 if (bb->succs->length () != 2)
7995 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
7996 if (GET_CODE (XEXP (ite, 0)) == NE)
8000 target = XEXP (ite, 1);
8001 else if (mask == (CC0 ^ 0xf))
8002 target = XEXP (ite, 2);
8010 ei = ei_start (bb->succs);
8011 e1 = ei_safe_edge (ei);
8013 e2 = ei_safe_edge (ei);
8015 if (e2->flags & EDGE_FALLTHRU)
8018 e1 = ei_safe_edge (ei);
8021 if (!(e1->flags & EDGE_FALLTHRU))
8024 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
8026 if (tmp == BB_END (bb))
8031 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
8033 if (tend_bb != NULL)
8040 /* Either we successfully remove the FPR clobbers here or we are not
8041 able to do anything for this TX. Both cases don't qualify for
8043 cfun->machine->tbegin_p = false;
8045 if (tbegin_bb == NULL || tend_bb == NULL)
8048 calculate_dominance_info (CDI_POST_DOMINATORS);
8049 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
8050 free_dominance_info (CDI_POST_DOMINATORS);
8055 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
8057 XVECEXP (PATTERN (tbegin_insn), 0, 0),
8058 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
8059 INSN_CODE (tbegin_insn) = -1;
8060 df_insn_rescan (tbegin_insn);
8065 /* Return true if it is legal to put a value with MODE into REGNO. */
8068 s390_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
8070 switch (REGNO_REG_CLASS (regno))
8073 if (REGNO_PAIR_OK (regno, mode))
8075 if (mode == SImode || mode == DImode)
8078 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
8083 if (FRAME_REGNO_P (regno) && mode == Pmode)
8088 if (REGNO_PAIR_OK (regno, mode))
8091 || (mode != TFmode && mode != TCmode && mode != TDmode))
8096 if (GET_MODE_CLASS (mode) == MODE_CC)
8100 if (REGNO_PAIR_OK (regno, mode))
8102 if (mode == SImode || mode == Pmode)
8113 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
8116 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
8118 /* Once we've decided upon a register to use as base register, it must
8119 no longer be used for any other purpose. */
8120 if (cfun->machine->base_reg)
8121 if (REGNO (cfun->machine->base_reg) == old_reg
8122 || REGNO (cfun->machine->base_reg) == new_reg)
8125 /* Prevent regrename from using call-saved regs which haven't
8126 actually been saved. This is necessary since regrename assumes
8127 the backend save/restore decisions are based on
8128 df_regs_ever_live. Since we have our own routine we have to tell
8129 regrename manually about it. */
8130 if (GENERAL_REGNO_P (new_reg)
8131 && !call_really_used_regs[new_reg]
8132 && cfun_gpr_save_slot (new_reg) == 0)
8138 /* Return nonzero if register REGNO can be used as a scratch register
8142 s390_hard_regno_scratch_ok (unsigned int regno)
8144 /* See s390_hard_regno_rename_ok. */
8145 if (GENERAL_REGNO_P (regno)
8146 && !call_really_used_regs[regno]
8147 && cfun_gpr_save_slot (regno) == 0)
8153 /* Maximum number of registers to represent a value of mode MODE
8154 in a register of class RCLASS. */
8157 s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
8162 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
8163 return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8);
8165 return (GET_MODE_SIZE (mode) + 8 - 1) / 8;
8167 return (GET_MODE_SIZE (mode) + 4 - 1) / 4;
8171 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8174 /* Return true if we use LRA instead of reload pass. */
8178 return s390_lra_flag;
8181 /* Return true if register FROM can be eliminated via register TO. */
8184 s390_can_eliminate (const int from, const int to)
8186 /* On zSeries machines, we have not marked the base register as fixed.
8187 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
8188 If a function requires the base register, we say here that this
8189 elimination cannot be performed. This will cause reload to free
8190 up the base register (as if it were fixed). On the other hand,
8191 if the current function does *not* require the base register, we
8192 say here the elimination succeeds, which in turn allows reload
8193 to allocate the base register for any other purpose. */
8194 if (from == BASE_REGNUM && to == BASE_REGNUM)
8196 if (TARGET_CPU_ZARCH)
8198 s390_init_frame_layout ();
8199 return cfun->machine->base_reg == NULL_RTX;
8205 /* Everything else must point into the stack frame. */
8206 gcc_assert (to == STACK_POINTER_REGNUM
8207 || to == HARD_FRAME_POINTER_REGNUM);
8209 gcc_assert (from == FRAME_POINTER_REGNUM
8210 || from == ARG_POINTER_REGNUM
8211 || from == RETURN_ADDRESS_POINTER_REGNUM);
8213 /* Make sure we actually saved the return address. */
8214 if (from == RETURN_ADDRESS_POINTER_REGNUM)
8215 if (!crtl->calls_eh_return
8217 && !cfun_frame_layout.save_return_addr_p)
8223 /* Return offset between register FROM and TO initially after prolog. */
8226 s390_initial_elimination_offset (int from, int to)
8228 HOST_WIDE_INT offset;
8230 /* ??? Why are we called for non-eliminable pairs? */
8231 if (!s390_can_eliminate (from, to))
8236 case FRAME_POINTER_REGNUM:
8237 offset = (get_frame_size()
8238 + STACK_POINTER_OFFSET
8239 + crtl->outgoing_args_size);
8242 case ARG_POINTER_REGNUM:
8243 s390_init_frame_layout ();
8244 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
8247 case RETURN_ADDRESS_POINTER_REGNUM:
8248 s390_init_frame_layout ();
8250 if (cfun_frame_layout.first_save_gpr_slot == -1)
8252 /* If it turns out that for stdarg nothing went into the reg
8253 save area we also do not need the return address
8255 if (cfun->stdarg && !cfun_save_arg_fprs_p)
8261 /* In order to make the following work it is not necessary for
8262 r14 to have a save slot. It is sufficient if one other GPR
8263 got one. Since the GPRs are always stored without gaps we
8264 are able to calculate where the r14 save slot would
8266 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
8267 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
8282 /* Emit insn to save fpr REGNUM at offset OFFSET relative
8283 to register BASE. Return generated insn. */
8286 save_fpr (rtx base, int offset, int regnum)
8289 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
8291 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
8292 set_mem_alias_set (addr, get_varargs_alias_set ());
8294 set_mem_alias_set (addr, get_frame_alias_set ());
8296 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
8299 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
8300 to register BASE. Return generated insn. */
8303 restore_fpr (rtx base, int offset, int regnum)
8306 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
8307 set_mem_alias_set (addr, get_frame_alias_set ());
8309 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
8312 /* Return true if REGNO is a global register, but not one
8313 of the special ones that need to be saved/restored in anyway. */
8316 global_not_special_regno_p (int regno)
8318 return (global_regs[regno]
8319 /* These registers are special and need to be
8320 restored in any case. */
8321 && !(regno == STACK_POINTER_REGNUM
8322 || regno == RETURN_REGNUM
8323 || regno == BASE_REGNUM
8324 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
8327 /* Generate insn to save registers FIRST to LAST into
8328 the register save area located at offset OFFSET
8329 relative to register BASE. */
8332 save_gprs (rtx base, int offset, int first, int last)
8334 rtx addr, insn, note;
8337 addr = plus_constant (Pmode, base, offset);
8338 addr = gen_rtx_MEM (Pmode, addr);
8340 set_mem_alias_set (addr, get_frame_alias_set ());
8342 /* Special-case single register. */
8346 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
8348 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
8350 if (!global_not_special_regno_p (first))
8351 RTX_FRAME_RELATED_P (insn) = 1;
8356 insn = gen_store_multiple (addr,
8357 gen_rtx_REG (Pmode, first),
8358 GEN_INT (last - first + 1));
8360 if (first <= 6 && cfun->stdarg)
8361 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8363 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
8366 set_mem_alias_set (mem, get_varargs_alias_set ());
8369 /* We need to set the FRAME_RELATED flag on all SETs
8370 inside the store-multiple pattern.
8372 However, we must not emit DWARF records for registers 2..5
8373 if they are stored for use by variable arguments ...
8375 ??? Unfortunately, it is not enough to simply not the
8376 FRAME_RELATED flags for those SETs, because the first SET
8377 of the PARALLEL is always treated as if it had the flag
8378 set, even if it does not. Therefore we emit a new pattern
8379 without those registers as REG_FRAME_RELATED_EXPR note. */
8381 if (first >= 6 && !global_not_special_regno_p (first))
8383 rtx pat = PATTERN (insn);
8385 for (i = 0; i < XVECLEN (pat, 0); i++)
8386 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
8387 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
8389 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
8391 RTX_FRAME_RELATED_P (insn) = 1;
8397 for (start = first >= 6 ? first : 6; start <= last; start++)
8398 if (!global_not_special_regno_p (start))
8404 addr = plus_constant (Pmode, base,
8405 offset + (start - first) * UNITS_PER_LONG);
8410 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
8411 gen_rtx_REG (Pmode, start));
8413 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
8414 gen_rtx_REG (Pmode, start));
8415 note = PATTERN (note);
8417 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
8418 RTX_FRAME_RELATED_P (insn) = 1;
8423 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
8424 gen_rtx_REG (Pmode, start),
8425 GEN_INT (last - start + 1));
8426 note = PATTERN (note);
8428 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
8430 for (i = 0; i < XVECLEN (note, 0); i++)
8431 if (GET_CODE (XVECEXP (note, 0, i)) == SET
8432 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
8434 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
8436 RTX_FRAME_RELATED_P (insn) = 1;
8442 /* Generate insn to restore registers FIRST to LAST from
8443 the register save area located at offset OFFSET
8444 relative to register BASE. */
8447 restore_gprs (rtx base, int offset, int first, int last)
8451 addr = plus_constant (Pmode, base, offset);
8452 addr = gen_rtx_MEM (Pmode, addr);
8453 set_mem_alias_set (addr, get_frame_alias_set ());
8455 /* Special-case single register. */
8459 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
8461 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
8463 RTX_FRAME_RELATED_P (insn) = 1;
8467 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
8469 GEN_INT (last - first + 1));
8470 RTX_FRAME_RELATED_P (insn) = 1;
8474 /* Return insn sequence to load the GOT register. */
8476 static GTY(()) rtx got_symbol;
8478 s390_load_got (void)
8482 /* We cannot use pic_offset_table_rtx here since we use this
8483 function also for non-pic if __tls_get_offset is called and in
8484 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
8486 rtx got_rtx = gen_rtx_REG (Pmode, 12);
8490 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8491 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
8496 if (TARGET_CPU_ZARCH)
8498 emit_move_insn (got_rtx, got_symbol);
8504 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
8505 UNSPEC_LTREL_OFFSET);
8506 offset = gen_rtx_CONST (Pmode, offset);
8507 offset = force_const_mem (Pmode, offset);
8509 emit_move_insn (got_rtx, offset);
8511 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
8513 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
8515 emit_move_insn (got_rtx, offset);
8518 insns = get_insns ();
8523 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
8524 and the change to the stack pointer. */
8527 s390_emit_stack_tie (void)
8529 rtx mem = gen_frame_mem (BLKmode,
8530 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
8532 emit_insn (gen_stack_tie (mem));
8535 /* Copy GPRS into FPR save slots. */
8538 s390_save_gprs_to_fprs (void)
8542 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
8545 for (i = 6; i < 16; i++)
8547 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
8550 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
8551 gen_rtx_REG (DImode, i));
8552 RTX_FRAME_RELATED_P (insn) = 1;
8557 /* Restore GPRs from FPR save slots. */
8560 s390_restore_gprs_from_fprs (void)
8564 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
8567 for (i = 6; i < 16; i++)
8569 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
8572 emit_move_insn (gen_rtx_REG (DImode, i),
8573 gen_rtx_REG (DImode, cfun_gpr_save_slot (i)));
8574 df_set_regs_ever_live (i, true);
8575 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
8576 if (i == STACK_POINTER_REGNUM)
8577 add_reg_note (insn, REG_CFA_DEF_CFA,
8578 plus_constant (Pmode, stack_pointer_rtx,
8579 STACK_POINTER_OFFSET));
8580 RTX_FRAME_RELATED_P (insn) = 1;
8586 /* A pass run immediately before shrink-wrapping and prologue and epilogue
8591 const pass_data pass_data_s390_early_mach =
8593 RTL_PASS, /* type */
8594 "early_mach", /* name */
8595 OPTGROUP_NONE, /* optinfo_flags */
8596 TV_MACH_DEP, /* tv_id */
8597 0, /* properties_required */
8598 0, /* properties_provided */
8599 0, /* properties_destroyed */
8600 0, /* todo_flags_start */
8601 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
8604 class pass_s390_early_mach : public rtl_opt_pass
8607 pass_s390_early_mach (gcc::context *ctxt)
8608 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
8611 /* opt_pass methods: */
8612 virtual unsigned int execute (function *);
8614 }; // class pass_s390_early_mach
8617 pass_s390_early_mach::execute (function *fun)
8621 /* Try to get rid of the FPR clobbers. */
8622 s390_optimize_nonescaping_tx ();
8624 /* Re-compute register info. */
8625 s390_register_info ();
8627 /* If we're using a base register, ensure that it is always valid for
8628 the first non-prologue instruction. */
8629 if (fun->machine->base_reg)
8630 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
8632 /* Annotate all constant pool references to let the scheduler know
8633 they implicitly use the base register. */
8634 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8637 annotate_constant_pool_refs (&PATTERN (insn));
8638 df_insn_rescan (insn);
8645 /* Expand the prologue into a bunch of separate insns. */
8648 s390_emit_prologue (void)
8656 /* Choose best register to use for temp use within prologue.
8657 See below for why TPF must use the register 1. */
8659 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
8661 && !TARGET_TPF_PROFILING)
8662 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8664 temp_reg = gen_rtx_REG (Pmode, 1);
8666 s390_save_gprs_to_fprs ();
8668 /* Save call saved gprs. */
8669 if (cfun_frame_layout.first_save_gpr != -1)
8671 insn = save_gprs (stack_pointer_rtx,
8672 cfun_frame_layout.gprs_offset +
8673 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
8674 - cfun_frame_layout.first_save_gpr_slot),
8675 cfun_frame_layout.first_save_gpr,
8676 cfun_frame_layout.last_save_gpr);
8680 /* Dummy insn to mark literal pool slot. */
8682 if (cfun->machine->base_reg)
8683 emit_insn (gen_main_pool (cfun->machine->base_reg));
8685 offset = cfun_frame_layout.f0_offset;
8687 /* Save f0 and f2. */
8688 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
8690 if (cfun_fpr_save_p (i))
8692 save_fpr (stack_pointer_rtx, offset, i);
8695 else if (!TARGET_PACKED_STACK || cfun->stdarg)
8699 /* Save f4 and f6. */
8700 offset = cfun_frame_layout.f4_offset;
8701 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
8703 if (cfun_fpr_save_p (i))
8705 insn = save_fpr (stack_pointer_rtx, offset, i);
8708 /* If f4 and f6 are call clobbered they are saved due to
8709 stdargs and therefore are not frame related. */
8710 if (!call_really_used_regs[i])
8711 RTX_FRAME_RELATED_P (insn) = 1;
8713 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
8717 if (TARGET_PACKED_STACK
8718 && cfun_save_high_fprs_p
8719 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
8721 offset = (cfun_frame_layout.f8_offset
8722 + (cfun_frame_layout.high_fprs - 1) * 8);
8724 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
8725 if (cfun_fpr_save_p (i))
8727 insn = save_fpr (stack_pointer_rtx, offset, i);
8729 RTX_FRAME_RELATED_P (insn) = 1;
8732 if (offset >= cfun_frame_layout.f8_offset)
8736 if (!TARGET_PACKED_STACK)
8737 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
8739 if (flag_stack_usage_info)
8740 current_function_static_stack_size = cfun_frame_layout.frame_size;
8742 /* Decrement stack pointer. */
8744 if (cfun_frame_layout.frame_size > 0)
8746 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8749 if (s390_stack_size)
8751 HOST_WIDE_INT stack_guard;
8753 if (s390_stack_guard)
8754 stack_guard = s390_stack_guard;
8757 /* If no value for stack guard is provided the smallest power of 2
8758 larger than the current frame size is chosen. */
8760 while (stack_guard < cfun_frame_layout.frame_size)
8764 if (cfun_frame_layout.frame_size >= s390_stack_size)
8766 warning (0, "frame size of function %qs is %wd"
8767 " bytes exceeding user provided stack limit of "
8769 "An unconditional trap is added.",
8770 current_function_name(), cfun_frame_layout.frame_size,
8772 emit_insn (gen_trap ());
8776 /* stack_guard has to be smaller than s390_stack_size.
8777 Otherwise we would emit an AND with zero which would
8778 not match the test under mask pattern. */
8779 if (stack_guard >= s390_stack_size)
8781 warning (0, "frame size of function %qs is %wd"
8782 " bytes which is more than half the stack size. "
8783 "The dynamic check would not be reliable. "
8784 "No check emitted for this function.",
8785 current_function_name(),
8786 cfun_frame_layout.frame_size);
8790 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
8791 & ~(stack_guard - 1));
8793 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
8794 GEN_INT (stack_check_mask));
8796 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
8798 t, const0_rtx, const0_rtx));
8800 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
8802 t, const0_rtx, const0_rtx));
8807 if (s390_warn_framesize > 0
8808 && cfun_frame_layout.frame_size >= s390_warn_framesize)
8809 warning (0, "frame size of %qs is %wd bytes",
8810 current_function_name (), cfun_frame_layout.frame_size);
8812 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
8813 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
8815 /* Save incoming stack pointer into temp reg. */
8816 if (TARGET_BACKCHAIN || next_fpr)
8817 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
8819 /* Subtract frame size from stack pointer. */
8821 if (DISP_IN_RANGE (INTVAL (frame_off)))
8823 insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8824 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8826 insn = emit_insn (insn);
8830 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
8831 frame_off = force_const_mem (Pmode, frame_off);
8833 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
8834 annotate_constant_pool_refs (&PATTERN (insn));
8837 RTX_FRAME_RELATED_P (insn) = 1;
8838 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8839 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8840 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8841 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8844 /* Set backchain. */
8846 if (TARGET_BACKCHAIN)
8848 if (cfun_frame_layout.backchain_offset)
8849 addr = gen_rtx_MEM (Pmode,
8850 plus_constant (Pmode, stack_pointer_rtx,
8851 cfun_frame_layout.backchain_offset));
8853 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8854 set_mem_alias_set (addr, get_frame_alias_set ());
8855 insn = emit_insn (gen_move_insn (addr, temp_reg));
8858 /* If we support non-call exceptions (e.g. for Java),
8859 we need to make sure the backchain pointer is set up
8860 before any possibly trapping memory access. */
8861 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
8863 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8864 emit_clobber (addr);
8868 /* Save fprs 8 - 15 (64 bit ABI). */
8870 if (cfun_save_high_fprs_p && next_fpr)
8872 /* If the stack might be accessed through a different register
8873 we have to make sure that the stack pointer decrement is not
8874 moved below the use of the stack slots. */
8875 s390_emit_stack_tie ();
8877 insn = emit_insn (gen_add2_insn (temp_reg,
8878 GEN_INT (cfun_frame_layout.f8_offset)));
8882 for (i = FPR8_REGNUM; i <= next_fpr; i++)
8883 if (cfun_fpr_save_p (i))
8885 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
8886 cfun_frame_layout.frame_size
8887 + cfun_frame_layout.f8_offset
8890 insn = save_fpr (temp_reg, offset, i);
8892 RTX_FRAME_RELATED_P (insn) = 1;
8893 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8894 gen_rtx_SET (VOIDmode,
8895 gen_rtx_MEM (DFmode, addr),
8896 gen_rtx_REG (DFmode, i)));
8900 /* Set frame pointer, if needed. */
8902 if (frame_pointer_needed)
8904 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8905 RTX_FRAME_RELATED_P (insn) = 1;
8908 /* Set up got pointer, if needed. */
8910 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
8912 rtx_insn *insns = s390_load_got ();
8914 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
8915 annotate_constant_pool_refs (&PATTERN (insn));
8920 if (TARGET_TPF_PROFILING)
8922 /* Generate a BAS instruction to serve as a function
8923 entry intercept to facilitate the use of tracing
8924 algorithms located at the branch target. */
8925 emit_insn (gen_prologue_tpf ());
8927 /* Emit a blockage here so that all code
8928 lies between the profiling mechanisms. */
8929 emit_insn (gen_blockage ());
8933 /* Expand the epilogue into a bunch of separate insns. */
8936 s390_emit_epilogue (bool sibcall)
8938 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
8939 int area_bottom, area_top, offset = 0;
8944 if (TARGET_TPF_PROFILING)
8947 /* Generate a BAS instruction to serve as a function
8948 entry intercept to facilitate the use of tracing
8949 algorithms located at the branch target. */
8951 /* Emit a blockage here so that all code
8952 lies between the profiling mechanisms. */
8953 emit_insn (gen_blockage ());
8955 emit_insn (gen_epilogue_tpf ());
8958 /* Check whether to use frame or stack pointer for restore. */
8960 frame_pointer = (frame_pointer_needed
8961 ? hard_frame_pointer_rtx : stack_pointer_rtx);
8963 s390_frame_area (&area_bottom, &area_top);
8965 /* Check whether we can access the register save area.
8966 If not, increment the frame pointer as required. */
8968 if (area_top <= area_bottom)
8970 /* Nothing to restore. */
8972 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
8973 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
8975 /* Area is in range. */
8976 offset = cfun_frame_layout.frame_size;
8980 rtx insn, frame_off, cfa;
8982 offset = area_bottom < 0 ? -area_bottom : 0;
8983 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
8985 cfa = gen_rtx_SET (VOIDmode, frame_pointer,
8986 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
8987 if (DISP_IN_RANGE (INTVAL (frame_off)))
8989 insn = gen_rtx_SET (VOIDmode, frame_pointer,
8990 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
8991 insn = emit_insn (insn);
8995 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
8996 frame_off = force_const_mem (Pmode, frame_off);
8998 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
8999 annotate_constant_pool_refs (&PATTERN (insn));
9001 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
9002 RTX_FRAME_RELATED_P (insn) = 1;
9005 /* Restore call saved fprs. */
9009 if (cfun_save_high_fprs_p)
9011 next_offset = cfun_frame_layout.f8_offset;
9012 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
9014 if (cfun_fpr_save_p (i))
9016 restore_fpr (frame_pointer,
9017 offset + next_offset, i);
9019 = alloc_reg_note (REG_CFA_RESTORE,
9020 gen_rtx_REG (DFmode, i), cfa_restores);
9029 next_offset = cfun_frame_layout.f4_offset;
9031 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
9033 if (cfun_fpr_save_p (i))
9035 restore_fpr (frame_pointer,
9036 offset + next_offset, i);
9038 = alloc_reg_note (REG_CFA_RESTORE,
9039 gen_rtx_REG (DFmode, i), cfa_restores);
9042 else if (!TARGET_PACKED_STACK)
9048 /* Return register. */
9050 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
9052 /* Restore call saved gprs. */
9054 if (cfun_frame_layout.first_restore_gpr != -1)
9059 /* Check for global register and save them
9060 to stack location from where they get restored. */
9062 for (i = cfun_frame_layout.first_restore_gpr;
9063 i <= cfun_frame_layout.last_restore_gpr;
9066 if (global_not_special_regno_p (i))
9068 addr = plus_constant (Pmode, frame_pointer,
9069 offset + cfun_frame_layout.gprs_offset
9070 + (i - cfun_frame_layout.first_save_gpr_slot)
9072 addr = gen_rtx_MEM (Pmode, addr);
9073 set_mem_alias_set (addr, get_frame_alias_set ());
9074 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
9078 = alloc_reg_note (REG_CFA_RESTORE,
9079 gen_rtx_REG (Pmode, i), cfa_restores);
9084 /* Fetch return address from stack before load multiple,
9085 this will do good for scheduling. */
9087 if (cfun_frame_layout.save_return_addr_p
9088 || (cfun_frame_layout.first_restore_gpr < BASE_REGNUM
9089 && cfun_frame_layout.last_restore_gpr > RETURN_REGNUM))
9091 int return_regnum = find_unused_clobbered_reg();
9094 return_reg = gen_rtx_REG (Pmode, return_regnum);
9096 addr = plus_constant (Pmode, frame_pointer,
9097 offset + cfun_frame_layout.gprs_offset
9099 - cfun_frame_layout.first_save_gpr_slot)
9101 addr = gen_rtx_MEM (Pmode, addr);
9102 set_mem_alias_set (addr, get_frame_alias_set ());
9103 emit_move_insn (return_reg, addr);
9107 insn = restore_gprs (frame_pointer,
9108 offset + cfun_frame_layout.gprs_offset
9109 + (cfun_frame_layout.first_restore_gpr
9110 - cfun_frame_layout.first_save_gpr_slot)
9112 cfun_frame_layout.first_restore_gpr,
9113 cfun_frame_layout.last_restore_gpr);
9114 insn = emit_insn (insn);
9115 REG_NOTES (insn) = cfa_restores;
9116 add_reg_note (insn, REG_CFA_DEF_CFA,
9117 plus_constant (Pmode, stack_pointer_rtx,
9118 STACK_POINTER_OFFSET));
9119 RTX_FRAME_RELATED_P (insn) = 1;
9122 s390_restore_gprs_from_fprs ();
9127 /* Return to caller. */
9129 p = rtvec_alloc (2);
9131 RTVEC_ELT (p, 0) = ret_rtx;
9132 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
9133 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
9137 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
9140 s300_set_up_by_prologue (hard_reg_set_container *regs)
9142 if (cfun->machine->base_reg
9143 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
9144 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
9147 /* Return true if the function can use simple_return to return outside
9148 of a shrink-wrapped region. At present shrink-wrapping is supported
9152 s390_can_use_simple_return_insn (void)
9157 /* Return true if the epilogue is guaranteed to contain only a return
9158 instruction and if a direct return can therefore be used instead.
9159 One of the main advantages of using direct return instructions
9160 is that we can then use conditional returns. */
9163 s390_can_use_return_insn (void)
9167 if (!reload_completed)
9173 if (TARGET_TPF_PROFILING)
9176 for (i = 0; i < 16; i++)
9177 if (cfun_gpr_save_slot (i))
9180 /* For 31 bit this is not covered by the frame_size check below
9181 since f4, f6 are saved in the register save area without needing
9182 additional stack space. */
9184 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
9187 if (cfun->machine->base_reg
9188 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
9191 return cfun_frame_layout.frame_size == 0;
9194 /* Return the size in bytes of a function argument of
9195 type TYPE and/or mode MODE. At least one of TYPE or
9196 MODE must be specified. */
9199 s390_function_arg_size (enum machine_mode mode, const_tree type)
9202 return int_size_in_bytes (type);
9204 /* No type info available for some library calls ... */
9205 if (mode != BLKmode)
9206 return GET_MODE_SIZE (mode);
9208 /* If we have neither type nor mode, abort */
9212 /* Return true if a function argument of type TYPE and mode MODE
9213 is to be passed in a floating-point register, if available. */
9216 s390_function_arg_float (enum machine_mode mode, const_tree type)
9218 int size = s390_function_arg_size (mode, type);
9222 /* Soft-float changes the ABI: no floating-point registers are used. */
9223 if (TARGET_SOFT_FLOAT)
9226 /* No type info available for some library calls ... */
9228 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
9230 /* The ABI says that record types with a single member are treated
9231 just like that member would be. */
9232 while (TREE_CODE (type) == RECORD_TYPE)
9234 tree field, single = NULL_TREE;
9236 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
9238 if (TREE_CODE (field) != FIELD_DECL)
9241 if (single == NULL_TREE)
9242 single = TREE_TYPE (field);
9247 if (single == NULL_TREE)
9253 return TREE_CODE (type) == REAL_TYPE;
9256 /* Return true if a function argument of type TYPE and mode MODE
9257 is to be passed in an integer register, or a pair of integer
9258 registers, if available. */
9261 s390_function_arg_integer (enum machine_mode mode, const_tree type)
9263 int size = s390_function_arg_size (mode, type);
9267 /* No type info available for some library calls ... */
9269 return GET_MODE_CLASS (mode) == MODE_INT
9270 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
9272 /* We accept small integral (and similar) types. */
9273 if (INTEGRAL_TYPE_P (type)
9274 || POINTER_TYPE_P (type)
9275 || TREE_CODE (type) == NULLPTR_TYPE
9276 || TREE_CODE (type) == OFFSET_TYPE
9277 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
9280 /* We also accept structs of size 1, 2, 4, 8 that are not
9281 passed in floating-point registers. */
9282 if (AGGREGATE_TYPE_P (type)
9283 && exact_log2 (size) >= 0
9284 && !s390_function_arg_float (mode, type))
9290 /* Return 1 if a function argument of type TYPE and mode MODE
9291 is to be passed by reference. The ABI specifies that only
9292 structures of size 1, 2, 4, or 8 bytes are passed by value,
9293 all other structures (and complex numbers) are passed by
9297 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
9298 enum machine_mode mode, const_tree type,
9299 bool named ATTRIBUTE_UNUSED)
9301 int size = s390_function_arg_size (mode, type);
9307 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
9310 if (TREE_CODE (type) == COMPLEX_TYPE
9311 || TREE_CODE (type) == VECTOR_TYPE)
9318 /* Update the data in CUM to advance over an argument of mode MODE and
9319 data type TYPE. (TYPE is null for libcalls where that information
9320 may not be available.). The boolean NAMED specifies whether the
9321 argument is a named argument (as opposed to an unnamed argument
9322 matching an ellipsis). */
9325 s390_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
9326 const_tree type, bool named ATTRIBUTE_UNUSED)
9328 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9330 if (s390_function_arg_float (mode, type))
9334 else if (s390_function_arg_integer (mode, type))
9336 int size = s390_function_arg_size (mode, type);
9337 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
9343 /* Define where to put the arguments to a function.
9344 Value is zero to push the argument on the stack,
9345 or a hard register in which to store the argument.
9347 MODE is the argument's machine mode.
9348 TYPE is the data type of the argument (as a tree).
9349 This is null for libcalls where that information may
9351 CUM is a variable of type CUMULATIVE_ARGS which gives info about
9352 the preceding args and about the function being called.
9353 NAMED is nonzero if this argument is a named parameter
9354 (otherwise it is an extra parameter matching an ellipsis).
9356 On S/390, we use general purpose registers 2 through 6 to
9357 pass integer, pointer, and certain structure arguments, and
9358 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
9359 to pass floating point arguments. All remaining arguments
9360 are pushed to the stack. */
9363 s390_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
9364 const_tree type, bool named ATTRIBUTE_UNUSED)
9366 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9368 if (s390_function_arg_float (mode, type))
9370 if (cum->fprs + 1 > FP_ARG_NUM_REG)
9373 return gen_rtx_REG (mode, cum->fprs + 16);
9375 else if (s390_function_arg_integer (mode, type))
9377 int size = s390_function_arg_size (mode, type);
9378 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
9380 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
9382 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
9383 return gen_rtx_REG (mode, cum->gprs + 2);
9384 else if (n_gprs == 2)
9386 rtvec p = rtvec_alloc (2);
9389 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
9392 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
9395 return gen_rtx_PARALLEL (mode, p);
9399 /* After the real arguments, expand_call calls us once again
9400 with a void_type_node type. Whatever we return here is
9401 passed as operand 2 to the call expanders.
9403 We don't need this feature ... */
9404 else if (type == void_type_node)
9410 /* Return true if return values of type TYPE should be returned
9411 in a memory buffer whose address is passed by the caller as
9412 hidden first argument. */
9415 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
9417 /* We accept small integral (and similar) types. */
9418 if (INTEGRAL_TYPE_P (type)
9419 || POINTER_TYPE_P (type)
9420 || TREE_CODE (type) == OFFSET_TYPE
9421 || TREE_CODE (type) == REAL_TYPE)
9422 return int_size_in_bytes (type) > 8;
9424 /* Aggregates and similar constructs are always returned
9426 if (AGGREGATE_TYPE_P (type)
9427 || TREE_CODE (type) == COMPLEX_TYPE
9428 || TREE_CODE (type) == VECTOR_TYPE)
9431 /* ??? We get called on all sorts of random stuff from
9432 aggregate_value_p. We can't abort, but it's not clear
9433 what's safe to return. Pretend it's a struct I guess. */
9437 /* Function arguments and return values are promoted to word size. */
9439 static enum machine_mode
9440 s390_promote_function_mode (const_tree type, enum machine_mode mode,
9442 const_tree fntype ATTRIBUTE_UNUSED,
9443 int for_return ATTRIBUTE_UNUSED)
9445 if (INTEGRAL_MODE_P (mode)
9446 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
9448 if (type != NULL_TREE && POINTER_TYPE_P (type))
9449 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9456 /* Define where to return a (scalar) value of type RET_TYPE.
9457 If RET_TYPE is null, define where to return a (scalar)
9458 value of mode MODE from a libcall. */
9461 s390_function_and_libcall_value (enum machine_mode mode,
9462 const_tree ret_type,
9463 const_tree fntype_or_decl,
9464 bool outgoing ATTRIBUTE_UNUSED)
9466 /* For normal functions perform the promotion as
9467 promote_function_mode would do. */
9470 int unsignedp = TYPE_UNSIGNED (ret_type);
9471 mode = promote_function_mode (ret_type, mode, &unsignedp,
9475 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode));
9476 gcc_assert (GET_MODE_SIZE (mode) <= 8);
9478 if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
9479 return gen_rtx_REG (mode, 16);
9480 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
9481 || UNITS_PER_LONG == UNITS_PER_WORD)
9482 return gen_rtx_REG (mode, 2);
9483 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
9485 /* This case is triggered when returning a 64 bit value with
9486 -m31 -mzarch. Although the value would fit into a single
9487 register it has to be forced into a 32 bit register pair in
9488 order to match the ABI. */
9489 rtvec p = rtvec_alloc (2);
9492 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
9494 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
9496 return gen_rtx_PARALLEL (mode, p);
9502 /* Define where to return a scalar return value of type RET_TYPE. */
9505 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
9508 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
9509 fn_decl_or_type, outgoing);
9512 /* Define where to return a scalar libcall return value of mode
9516 s390_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9518 return s390_function_and_libcall_value (mode, NULL_TREE,
9523 /* Create and return the va_list datatype.
9525 On S/390, va_list is an array type equivalent to
9527 typedef struct __va_list_tag
9531 void *__overflow_arg_area;
9532 void *__reg_save_area;
9535 where __gpr and __fpr hold the number of general purpose
9536 or floating point arguments used up to now, respectively,
9537 __overflow_arg_area points to the stack location of the
9538 next argument passed on the stack, and __reg_save_area
9539 always points to the start of the register area in the
9540 call frame of the current function. The function prologue
9541 saves all registers used for argument passing into this
9542 area if the function uses variable arguments. */
9545 s390_build_builtin_va_list (void)
9547 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9549 record = lang_hooks.types.make_type (RECORD_TYPE);
9552 build_decl (BUILTINS_LOCATION,
9553 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9555 f_gpr = build_decl (BUILTINS_LOCATION,
9556 FIELD_DECL, get_identifier ("__gpr"),
9557 long_integer_type_node);
9558 f_fpr = build_decl (BUILTINS_LOCATION,
9559 FIELD_DECL, get_identifier ("__fpr"),
9560 long_integer_type_node);
9561 f_ovf = build_decl (BUILTINS_LOCATION,
9562 FIELD_DECL, get_identifier ("__overflow_arg_area"),
9564 f_sav = build_decl (BUILTINS_LOCATION,
9565 FIELD_DECL, get_identifier ("__reg_save_area"),
9568 va_list_gpr_counter_field = f_gpr;
9569 va_list_fpr_counter_field = f_fpr;
9571 DECL_FIELD_CONTEXT (f_gpr) = record;
9572 DECL_FIELD_CONTEXT (f_fpr) = record;
9573 DECL_FIELD_CONTEXT (f_ovf) = record;
9574 DECL_FIELD_CONTEXT (f_sav) = record;
9576 TYPE_STUB_DECL (record) = type_decl;
9577 TYPE_NAME (record) = type_decl;
9578 TYPE_FIELDS (record) = f_gpr;
9579 DECL_CHAIN (f_gpr) = f_fpr;
9580 DECL_CHAIN (f_fpr) = f_ovf;
9581 DECL_CHAIN (f_ovf) = f_sav;
9583 layout_type (record);
9585 /* The correct type is an array type of one element. */
9586 return build_array_type (record, build_index_type (size_zero_node));
9589 /* Implement va_start by filling the va_list structure VALIST.
9590 STDARG_P is always true, and ignored.
9591 NEXTARG points to the first anonymous stack argument.
9593 The following global variables are used to initialize
9594 the va_list structure:
9597 holds number of gprs and fprs used for named arguments.
9598 crtl->args.arg_offset_rtx:
9599 holds the offset of the first anonymous stack argument
9600 (relative to the virtual arg pointer). */
9603 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
9605 HOST_WIDE_INT n_gpr, n_fpr;
9607 tree f_gpr, f_fpr, f_ovf, f_sav;
9608 tree gpr, fpr, ovf, sav, t;
9610 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9611 f_fpr = DECL_CHAIN (f_gpr);
9612 f_ovf = DECL_CHAIN (f_fpr);
9613 f_sav = DECL_CHAIN (f_ovf);
9615 valist = build_simple_mem_ref (valist);
9616 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9617 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9618 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9619 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9621 /* Count number of gp and fp argument registers used. */
9623 n_gpr = crtl->args.info.gprs;
9624 n_fpr = crtl->args.info.fprs;
9626 if (cfun->va_list_gpr_size)
9628 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
9629 build_int_cst (NULL_TREE, n_gpr));
9630 TREE_SIDE_EFFECTS (t) = 1;
9631 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9634 if (cfun->va_list_fpr_size)
9636 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
9637 build_int_cst (NULL_TREE, n_fpr));
9638 TREE_SIDE_EFFECTS (t) = 1;
9639 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9642 /* Find the overflow area. */
9643 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
9644 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG)
9646 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
9648 off = INTVAL (crtl->args.arg_offset_rtx);
9649 off = off < 0 ? 0 : off;
9650 if (TARGET_DEBUG_ARG)
9651 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
9652 (int)n_gpr, (int)n_fpr, off);
9654 t = fold_build_pointer_plus_hwi (t, off);
9656 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
9657 TREE_SIDE_EFFECTS (t) = 1;
9658 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9661 /* Find the register save area. */
9662 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
9663 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
9665 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
9666 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
9668 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
9669 TREE_SIDE_EFFECTS (t) = 1;
9670 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9674 /* Implement va_arg by updating the va_list structure
9675 VALIST as required to retrieve an argument of type
9676 TYPE, and returning that argument.
9678 Generates code equivalent to:
9680 if (integral value) {
9681 if (size <= 4 && args.gpr < 5 ||
9682 size > 4 && args.gpr < 4 )
9683 ret = args.reg_save_area[args.gpr+8]
9685 ret = *args.overflow_arg_area++;
9686 } else if (float value) {
9688 ret = args.reg_save_area[args.fpr+64]
9690 ret = *args.overflow_arg_area++;
9691 } else if (aggregate value) {
9693 ret = *args.reg_save_area[args.gpr]
9695 ret = **args.overflow_arg_area++;
9699 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9700 gimple_seq *post_p ATTRIBUTE_UNUSED)
9702 tree f_gpr, f_fpr, f_ovf, f_sav;
9703 tree gpr, fpr, ovf, sav, reg, t, u;
9704 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
9705 tree lab_false, lab_over, addr;
9707 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9708 f_fpr = DECL_CHAIN (f_gpr);
9709 f_ovf = DECL_CHAIN (f_fpr);
9710 f_sav = DECL_CHAIN (f_ovf);
9712 valist = build_va_arg_indirect_ref (valist);
9713 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9714 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9715 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9717 /* The tree for args* cannot be shared between gpr/fpr and ovf since
9718 both appear on a lhs. */
9719 valist = unshare_expr (valist);
9720 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9722 size = int_size_in_bytes (type);
9724 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
9726 if (TARGET_DEBUG_ARG)
9728 fprintf (stderr, "va_arg: aggregate type");
9732 /* Aggregates are passed by reference. */
9737 /* kernel stack layout on 31 bit: It is assumed here that no padding
9738 will be added by s390_frame_info because for va_args always an even
9739 number of gprs has to be saved r15-r2 = 14 regs. */
9740 sav_ofs = 2 * UNITS_PER_LONG;
9741 sav_scale = UNITS_PER_LONG;
9742 size = UNITS_PER_LONG;
9743 max_reg = GP_ARG_NUM_REG - n_reg;
9745 else if (s390_function_arg_float (TYPE_MODE (type), type))
9747 if (TARGET_DEBUG_ARG)
9749 fprintf (stderr, "va_arg: float type");
9753 /* FP args go in FP registers, if present. */
9757 sav_ofs = 16 * UNITS_PER_LONG;
9759 max_reg = FP_ARG_NUM_REG - n_reg;
9763 if (TARGET_DEBUG_ARG)
9765 fprintf (stderr, "va_arg: other type");
9769 /* Otherwise into GP registers. */
9772 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
9774 /* kernel stack layout on 31 bit: It is assumed here that no padding
9775 will be added by s390_frame_info because for va_args always an even
9776 number of gprs has to be saved r15-r2 = 14 regs. */
9777 sav_ofs = 2 * UNITS_PER_LONG;
9779 if (size < UNITS_PER_LONG)
9780 sav_ofs += UNITS_PER_LONG - size;
9782 sav_scale = UNITS_PER_LONG;
9783 max_reg = GP_ARG_NUM_REG - n_reg;
9786 /* Pull the value out of the saved registers ... */
9788 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9789 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9790 addr = create_tmp_var (ptr_type_node, "addr");
9792 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
9793 t = build2 (GT_EXPR, boolean_type_node, reg, t);
9794 u = build1 (GOTO_EXPR, void_type_node, lab_false);
9795 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
9796 gimplify_and_add (t, pre_p);
9798 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
9799 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
9800 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
9801 t = fold_build_pointer_plus (t, u);
9803 gimplify_assign (addr, t, pre_p);
9805 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9807 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9810 /* ... Otherwise out of the overflow area. */
9813 if (size < UNITS_PER_LONG)
9814 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
9816 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9818 gimplify_assign (addr, t, pre_p);
9820 t = fold_build_pointer_plus_hwi (t, size);
9821 gimplify_assign (ovf, t, pre_p);
9823 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9826 /* Increment register save count. */
9828 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
9829 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
9830 gimplify_and_add (u, pre_p);
9834 t = build_pointer_type_for_mode (build_pointer_type (type),
9836 addr = fold_convert (t, addr);
9837 addr = build_va_arg_indirect_ref (addr);
9841 t = build_pointer_type_for_mode (type, ptr_mode, true);
9842 addr = fold_convert (t, addr);
9845 return build_va_arg_indirect_ref (addr);
9848 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
9850 DEST - Register location where CC will be stored.
9851 TDB - Pointer to a 256 byte area where to store the transaction.
9852 diagnostic block. NULL if TDB is not needed.
9853 RETRY - Retry count value. If non-NULL a retry loop for CC2
9855 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
9856 of the tbegin instruction pattern. */
9859 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
9861 rtx retry_plus_two = gen_reg_rtx (SImode);
9862 rtx retry_reg = gen_reg_rtx (SImode);
9863 rtx_code_label *retry_label = NULL;
9865 if (retry != NULL_RTX)
9867 emit_move_insn (retry_reg, retry);
9868 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
9869 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
9870 retry_label = gen_label_rtx ();
9871 emit_label (retry_label);
9875 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), tdb));
9877 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
9880 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
9881 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
9884 if (retry != NULL_RTX)
9886 const int CC0 = 1 << 3;
9887 const int CC1 = 1 << 2;
9888 const int CC3 = 1 << 0;
9890 rtx count = gen_reg_rtx (SImode);
9891 rtx_code_label *leave_label = gen_label_rtx ();
9893 /* Exit for success and permanent failures. */
9894 jump = s390_emit_jump (leave_label,
9895 gen_rtx_EQ (VOIDmode,
9896 gen_rtx_REG (CCRAWmode, CC_REGNUM),
9897 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
9898 LABEL_NUSES (leave_label) = 1;
9900 /* CC2 - transient failure. Perform retry with ppa. */
9901 emit_move_insn (count, retry_plus_two);
9902 emit_insn (gen_subsi3 (count, count, retry_reg));
9903 emit_insn (gen_tx_assist (count));
9904 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
9907 JUMP_LABEL (jump) = retry_label;
9908 LABEL_NUSES (retry_label) = 1;
9909 emit_label (leave_label);
9917 S390_BUILTIN_TBEGIN,
9918 S390_BUILTIN_TBEGIN_NOFLOAT,
9919 S390_BUILTIN_TBEGIN_RETRY,
9920 S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
9921 S390_BUILTIN_TBEGINC,
9923 S390_BUILTIN_TABORT,
9924 S390_BUILTIN_NON_TX_STORE,
9925 S390_BUILTIN_TX_NESTING_DEPTH,
9926 S390_BUILTIN_TX_ASSIST,
9931 static enum insn_code const code_for_builtin[S390_BUILTIN_max] = {
9933 CODE_FOR_tbegin_nofloat,
9934 CODE_FOR_tbegin_retry,
9935 CODE_FOR_tbegin_retry_nofloat,
9945 s390_init_builtins (void)
9947 tree ftype, uint64_type;
9948 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
9950 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
9952 /* void foo (void) */
9953 ftype = build_function_type_list (void_type_node, NULL_TREE);
9954 add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC,
9955 BUILT_IN_MD, NULL, NULL_TREE);
9957 /* void foo (int) */
9958 ftype = build_function_type_list (void_type_node, integer_type_node,
9960 add_builtin_function ("__builtin_tabort", ftype,
9961 S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, noreturn_attr);
9962 add_builtin_function ("__builtin_tx_assist", ftype,
9963 S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE);
9965 /* int foo (void *) */
9966 ftype = build_function_type_list (integer_type_node, ptr_type_node, NULL_TREE);
9967 add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN,
9968 BUILT_IN_MD, NULL, returns_twice_attr);
9969 add_builtin_function ("__builtin_tbegin_nofloat", ftype,
9970 S390_BUILTIN_TBEGIN_NOFLOAT,
9971 BUILT_IN_MD, NULL, returns_twice_attr);
9973 /* int foo (void *, int) */
9974 ftype = build_function_type_list (integer_type_node, ptr_type_node,
9975 integer_type_node, NULL_TREE);
9976 add_builtin_function ("__builtin_tbegin_retry", ftype,
9977 S390_BUILTIN_TBEGIN_RETRY,
9979 NULL, returns_twice_attr);
9980 add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype,
9981 S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
9983 NULL, returns_twice_attr);
9985 /* int foo (void) */
9986 ftype = build_function_type_list (integer_type_node, NULL_TREE);
9987 add_builtin_function ("__builtin_tx_nesting_depth", ftype,
9988 S390_BUILTIN_TX_NESTING_DEPTH,
9989 BUILT_IN_MD, NULL, NULL_TREE);
9990 add_builtin_function ("__builtin_tend", ftype,
9991 S390_BUILTIN_TEND, BUILT_IN_MD, NULL, NULL_TREE);
9993 /* void foo (uint64_t *, uint64_t) */
9995 uint64_type = long_unsigned_type_node;
9997 uint64_type = long_long_unsigned_type_node;
9999 ftype = build_function_type_list (void_type_node,
10000 build_pointer_type (uint64_type),
10001 uint64_type, NULL_TREE);
10002 add_builtin_function ("__builtin_non_tx_store", ftype,
10003 S390_BUILTIN_NON_TX_STORE,
10004 BUILT_IN_MD, NULL, NULL_TREE);
10007 /* Expand an expression EXP that calls a built-in function,
10008 with result going to TARGET if that's convenient
10009 (and in mode MODE if that's convenient).
10010 SUBTARGET may be used as the target for computing one of EXP's operands.
10011 IGNORE is nonzero if the value is to be ignored. */
10014 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10015 enum machine_mode mode ATTRIBUTE_UNUSED,
10016 int ignore ATTRIBUTE_UNUSED)
10020 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10021 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10022 enum insn_code icode;
10023 rtx op[MAX_ARGS], pat;
10027 call_expr_arg_iterator iter;
10029 if (fcode >= S390_BUILTIN_max)
10030 internal_error ("bad builtin fcode");
10031 icode = code_for_builtin[fcode];
10033 internal_error ("bad builtin fcode");
10036 error ("Transactional execution builtins not enabled (-mhtm)\n");
10038 /* Set a flag in the machine specific cfun part in order to support
10039 saving/restoring of FPRs. */
10040 if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY)
10041 cfun->machine->tbegin_p = true;
10043 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10046 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10048 const struct insn_operand_data *insn_op;
10050 if (arg == error_mark_node)
10052 if (arity >= MAX_ARGS)
10055 insn_op = &insn_data[icode].operand[arity + nonvoid];
10057 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
10059 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
10061 if (insn_op->predicate == memory_operand)
10063 /* Don't move a NULL pointer into a register. Otherwise
10064 we have to rely on combine being able to move it back
10065 in order to get an immediate 0 in the instruction. */
10066 if (op[arity] != const0_rtx)
10067 op[arity] = copy_to_mode_reg (Pmode, op[arity]);
10068 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
10071 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
10079 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10081 || GET_MODE (target) != tmode
10082 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
10083 target = gen_reg_rtx (tmode);
10089 pat = GEN_FCN (icode) (target);
10093 pat = GEN_FCN (icode) (target, op[0]);
10095 pat = GEN_FCN (icode) (op[0]);
10099 pat = GEN_FCN (icode) (target, op[0], op[1]);
10101 pat = GEN_FCN (icode) (op[0], op[1]);
10104 gcc_unreachable ();
10116 /* We call mcount before the function prologue. So a profiled leaf
10117 function should stay a leaf function. */
10120 s390_keep_leaf_when_profiled ()
10125 /* Output assembly code for the trampoline template to
10128 On S/390, we use gpr 1 internally in the trampoline code;
10129 gpr 0 is used to hold the static chain. */
10132 s390_asm_trampoline_template (FILE *file)
10135 op[0] = gen_rtx_REG (Pmode, 0);
10136 op[1] = gen_rtx_REG (Pmode, 1);
10140 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
10141 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
10142 output_asm_insn ("br\t%1", op); /* 2 byte */
10143 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
10147 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
10148 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
10149 output_asm_insn ("br\t%1", op); /* 2 byte */
10150 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
10154 /* Emit RTL insns to initialize the variable parts of a trampoline.
10155 FNADDR is an RTX for the address of the function's pure code.
10156 CXT is an RTX for the static chain value for the function. */
10159 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10161 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10164 emit_block_move (m_tramp, assemble_trampoline_template (),
10165 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
10167 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
10168 emit_move_insn (mem, cxt);
10169 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
10170 emit_move_insn (mem, fnaddr);
10173 /* Output assembler code to FILE to increment profiler label # LABELNO
10174 for profiling a function entry. */
10177 s390_function_profiler (FILE *file, int labelno)
10182 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
10184 fprintf (file, "# function profiler \n");
10186 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
10187 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
10188 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
10190 op[2] = gen_rtx_REG (Pmode, 1);
10191 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
10192 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
10194 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
10197 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
10198 op[4] = gen_rtx_CONST (Pmode, op[4]);
10203 output_asm_insn ("stg\t%0,%1", op);
10204 output_asm_insn ("larl\t%2,%3", op);
10205 output_asm_insn ("brasl\t%0,%4", op);
10206 output_asm_insn ("lg\t%0,%1", op);
10208 else if (!flag_pic)
10210 op[6] = gen_label_rtx ();
10212 output_asm_insn ("st\t%0,%1", op);
10213 output_asm_insn ("bras\t%2,%l6", op);
10214 output_asm_insn (".long\t%4", op);
10215 output_asm_insn (".long\t%3", op);
10216 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
10217 output_asm_insn ("l\t%0,0(%2)", op);
10218 output_asm_insn ("l\t%2,4(%2)", op);
10219 output_asm_insn ("basr\t%0,%0", op);
10220 output_asm_insn ("l\t%0,%1", op);
10224 op[5] = gen_label_rtx ();
10225 op[6] = gen_label_rtx ();
10227 output_asm_insn ("st\t%0,%1", op);
10228 output_asm_insn ("bras\t%2,%l6", op);
10229 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
10230 output_asm_insn (".long\t%4-%l5", op);
10231 output_asm_insn (".long\t%3-%l5", op);
10232 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
10233 output_asm_insn ("lr\t%0,%2", op);
10234 output_asm_insn ("a\t%0,0(%2)", op);
10235 output_asm_insn ("a\t%2,4(%2)", op);
10236 output_asm_insn ("basr\t%0,%0", op);
10237 output_asm_insn ("l\t%0,%1", op);
10241 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
10242 into its SYMBOL_REF_FLAGS. */
10245 s390_encode_section_info (tree decl, rtx rtl, int first)
10247 default_encode_section_info (decl, rtl, first);
10249 if (TREE_CODE (decl) == VAR_DECL)
10251 /* If a variable has a forced alignment to < 2 bytes, mark it
10252 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
10254 if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
10255 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
10256 if (!DECL_SIZE (decl)
10257 || !DECL_ALIGN (decl)
10258 || !tree_fits_shwi_p (DECL_SIZE (decl))
10259 || (DECL_ALIGN (decl) <= 64
10260 && DECL_ALIGN (decl) != tree_to_shwi (DECL_SIZE (decl))))
10261 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
10264 /* Literal pool references don't have a decl so they are handled
10265 differently here. We rely on the information in the MEM_ALIGN
10266 entry to decide upon natural alignment. */
10268 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
10269 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
10270 && (MEM_ALIGN (rtl) == 0
10271 || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
10272 || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
10273 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
10276 /* Output thunk to FILE that implements a C++ virtual function call (with
10277 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
10278 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
10279 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
10280 relative to the resulting this pointer. */
10283 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10284 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10290 /* Make sure unwind info is emitted for the thunk if needed. */
10291 final_start_function (emit_barrier (), file, 1);
10293 /* Operand 0 is the target function. */
10294 op[0] = XEXP (DECL_RTL (function), 0);
10295 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
10298 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
10299 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
10300 op[0] = gen_rtx_CONST (Pmode, op[0]);
10303 /* Operand 1 is the 'this' pointer. */
10304 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10305 op[1] = gen_rtx_REG (Pmode, 3);
10307 op[1] = gen_rtx_REG (Pmode, 2);
10309 /* Operand 2 is the delta. */
10310 op[2] = GEN_INT (delta);
10312 /* Operand 3 is the vcall_offset. */
10313 op[3] = GEN_INT (vcall_offset);
10315 /* Operand 4 is the temporary register. */
10316 op[4] = gen_rtx_REG (Pmode, 1);
10318 /* Operands 5 to 8 can be used as labels. */
10324 /* Operand 9 can be used for temporary register. */
10327 /* Generate code. */
10330 /* Setup literal pool pointer if required. */
10331 if ((!DISP_IN_RANGE (delta)
10332 && !CONST_OK_FOR_K (delta)
10333 && !CONST_OK_FOR_Os (delta))
10334 || (!DISP_IN_RANGE (vcall_offset)
10335 && !CONST_OK_FOR_K (vcall_offset)
10336 && !CONST_OK_FOR_Os (vcall_offset)))
10338 op[5] = gen_label_rtx ();
10339 output_asm_insn ("larl\t%4,%5", op);
10342 /* Add DELTA to this pointer. */
10345 if (CONST_OK_FOR_J (delta))
10346 output_asm_insn ("la\t%1,%2(%1)", op);
10347 else if (DISP_IN_RANGE (delta))
10348 output_asm_insn ("lay\t%1,%2(%1)", op);
10349 else if (CONST_OK_FOR_K (delta))
10350 output_asm_insn ("aghi\t%1,%2", op);
10351 else if (CONST_OK_FOR_Os (delta))
10352 output_asm_insn ("agfi\t%1,%2", op);
10355 op[6] = gen_label_rtx ();
10356 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
10360 /* Perform vcall adjustment. */
10363 if (DISP_IN_RANGE (vcall_offset))
10365 output_asm_insn ("lg\t%4,0(%1)", op);
10366 output_asm_insn ("ag\t%1,%3(%4)", op);
10368 else if (CONST_OK_FOR_K (vcall_offset))
10370 output_asm_insn ("lghi\t%4,%3", op);
10371 output_asm_insn ("ag\t%4,0(%1)", op);
10372 output_asm_insn ("ag\t%1,0(%4)", op);
10374 else if (CONST_OK_FOR_Os (vcall_offset))
10376 output_asm_insn ("lgfi\t%4,%3", op);
10377 output_asm_insn ("ag\t%4,0(%1)", op);
10378 output_asm_insn ("ag\t%1,0(%4)", op);
10382 op[7] = gen_label_rtx ();
10383 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
10384 output_asm_insn ("ag\t%4,0(%1)", op);
10385 output_asm_insn ("ag\t%1,0(%4)", op);
10389 /* Jump to target. */
10390 output_asm_insn ("jg\t%0", op);
10392 /* Output literal pool if required. */
10395 output_asm_insn (".align\t4", op);
10396 targetm.asm_out.internal_label (file, "L",
10397 CODE_LABEL_NUMBER (op[5]));
10401 targetm.asm_out.internal_label (file, "L",
10402 CODE_LABEL_NUMBER (op[6]));
10403 output_asm_insn (".long\t%2", op);
10407 targetm.asm_out.internal_label (file, "L",
10408 CODE_LABEL_NUMBER (op[7]));
10409 output_asm_insn (".long\t%3", op);
10414 /* Setup base pointer if required. */
10416 || (!DISP_IN_RANGE (delta)
10417 && !CONST_OK_FOR_K (delta)
10418 && !CONST_OK_FOR_Os (delta))
10419 || (!DISP_IN_RANGE (delta)
10420 && !CONST_OK_FOR_K (vcall_offset)
10421 && !CONST_OK_FOR_Os (vcall_offset)))
10423 op[5] = gen_label_rtx ();
10424 output_asm_insn ("basr\t%4,0", op);
10425 targetm.asm_out.internal_label (file, "L",
10426 CODE_LABEL_NUMBER (op[5]));
10429 /* Add DELTA to this pointer. */
10432 if (CONST_OK_FOR_J (delta))
10433 output_asm_insn ("la\t%1,%2(%1)", op);
10434 else if (DISP_IN_RANGE (delta))
10435 output_asm_insn ("lay\t%1,%2(%1)", op);
10436 else if (CONST_OK_FOR_K (delta))
10437 output_asm_insn ("ahi\t%1,%2", op);
10438 else if (CONST_OK_FOR_Os (delta))
10439 output_asm_insn ("afi\t%1,%2", op);
10442 op[6] = gen_label_rtx ();
10443 output_asm_insn ("a\t%1,%6-%5(%4)", op);
10447 /* Perform vcall adjustment. */
10450 if (CONST_OK_FOR_J (vcall_offset))
10452 output_asm_insn ("l\t%4,0(%1)", op);
10453 output_asm_insn ("a\t%1,%3(%4)", op);
10455 else if (DISP_IN_RANGE (vcall_offset))
10457 output_asm_insn ("l\t%4,0(%1)", op);
10458 output_asm_insn ("ay\t%1,%3(%4)", op);
10460 else if (CONST_OK_FOR_K (vcall_offset))
10462 output_asm_insn ("lhi\t%4,%3", op);
10463 output_asm_insn ("a\t%4,0(%1)", op);
10464 output_asm_insn ("a\t%1,0(%4)", op);
10466 else if (CONST_OK_FOR_Os (vcall_offset))
10468 output_asm_insn ("iilf\t%4,%3", op);
10469 output_asm_insn ("a\t%4,0(%1)", op);
10470 output_asm_insn ("a\t%1,0(%4)", op);
10474 op[7] = gen_label_rtx ();
10475 output_asm_insn ("l\t%4,%7-%5(%4)", op);
10476 output_asm_insn ("a\t%4,0(%1)", op);
10477 output_asm_insn ("a\t%1,0(%4)", op);
10480 /* We had to clobber the base pointer register.
10481 Re-setup the base pointer (with a different base). */
10482 op[5] = gen_label_rtx ();
10483 output_asm_insn ("basr\t%4,0", op);
10484 targetm.asm_out.internal_label (file, "L",
10485 CODE_LABEL_NUMBER (op[5]));
10488 /* Jump to target. */
10489 op[8] = gen_label_rtx ();
10492 output_asm_insn ("l\t%4,%8-%5(%4)", op);
10493 else if (!nonlocal)
10494 output_asm_insn ("a\t%4,%8-%5(%4)", op);
10495 /* We cannot call through .plt, since .plt requires %r12 loaded. */
10496 else if (flag_pic == 1)
10498 output_asm_insn ("a\t%4,%8-%5(%4)", op);
10499 output_asm_insn ("l\t%4,%0(%4)", op);
10501 else if (flag_pic == 2)
10503 op[9] = gen_rtx_REG (Pmode, 0);
10504 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
10505 output_asm_insn ("a\t%4,%8-%5(%4)", op);
10506 output_asm_insn ("ar\t%4,%9", op);
10507 output_asm_insn ("l\t%4,0(%4)", op);
10510 output_asm_insn ("br\t%4", op);
10512 /* Output literal pool. */
10513 output_asm_insn (".align\t4", op);
10515 if (nonlocal && flag_pic == 2)
10516 output_asm_insn (".long\t%0", op);
10519 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10520 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
10523 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
10525 output_asm_insn (".long\t%0", op);
10527 output_asm_insn (".long\t%0-%5", op);
10531 targetm.asm_out.internal_label (file, "L",
10532 CODE_LABEL_NUMBER (op[6]));
10533 output_asm_insn (".long\t%2", op);
10537 targetm.asm_out.internal_label (file, "L",
10538 CODE_LABEL_NUMBER (op[7]));
10539 output_asm_insn (".long\t%3", op);
10542 final_end_function ();
10546 s390_valid_pointer_mode (enum machine_mode mode)
10548 return (mode == SImode || (TARGET_64BIT && mode == DImode));
10551 /* Checks whether the given CALL_EXPR would use a caller
10552 saved register. This is used to decide whether sibling call
10553 optimization could be performed on the respective function
10557 s390_call_saved_register_used (tree call_expr)
10559 CUMULATIVE_ARGS cum_v;
10560 cumulative_args_t cum;
10562 enum machine_mode mode;
10567 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
10568 cum = pack_cumulative_args (&cum_v);
10570 for (i = 0; i < call_expr_nargs (call_expr); i++)
10572 parameter = CALL_EXPR_ARG (call_expr, i);
10573 gcc_assert (parameter);
10575 /* For an undeclared variable passed as parameter we will get
10576 an ERROR_MARK node here. */
10577 if (TREE_CODE (parameter) == ERROR_MARK)
10580 type = TREE_TYPE (parameter);
10583 mode = TYPE_MODE (type);
10586 if (pass_by_reference (&cum_v, mode, type, true))
10589 type = build_pointer_type (type);
10592 parm_rtx = s390_function_arg (cum, mode, type, 0);
10594 s390_function_arg_advance (cum, mode, type, 0);
10599 if (REG_P (parm_rtx))
10602 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
10604 if (!call_used_regs[reg + REGNO (parm_rtx)])
10608 if (GET_CODE (parm_rtx) == PARALLEL)
10612 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
10614 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
10616 gcc_assert (REG_P (r));
10619 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
10621 if (!call_used_regs[reg + REGNO (r)])
10630 /* Return true if the given call expression can be
10631 turned into a sibling call.
10632 DECL holds the declaration of the function to be called whereas
10633 EXP is the call expression itself. */
10636 s390_function_ok_for_sibcall (tree decl, tree exp)
10638 /* The TPF epilogue uses register 1. */
10639 if (TARGET_TPF_PROFILING)
10642 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
10643 which would have to be restored before the sibcall. */
10644 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
10647 /* Register 6 on s390 is available as an argument register but unfortunately
10648 "caller saved". This makes functions needing this register for arguments
10649 not suitable for sibcalls. */
10650 return !s390_call_saved_register_used (exp);
10653 /* Return the fixed registers used for condition codes. */
10656 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10659 *p2 = INVALID_REGNUM;
10664 /* This function is used by the call expanders of the machine description.
10665 It emits the call insn itself together with the necessary operations
10666 to adjust the target address and returns the emitted insn.
10667 ADDR_LOCATION is the target address rtx
10668 TLS_CALL the location of the thread-local symbol
10669 RESULT_REG the register where the result of the call should be stored
10670 RETADDR_REG the register where the return address should be stored
10671 If this parameter is NULL_RTX the call is considered
10672 to be a sibling call. */
10675 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
10678 bool plt_call = false;
10684 /* Direct function calls need special treatment. */
10685 if (GET_CODE (addr_location) == SYMBOL_REF)
10687 /* When calling a global routine in PIC mode, we must
10688 replace the symbol itself with the PLT stub. */
10689 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
10691 if (retaddr_reg != NULL_RTX)
10693 addr_location = gen_rtx_UNSPEC (Pmode,
10694 gen_rtvec (1, addr_location),
10696 addr_location = gen_rtx_CONST (Pmode, addr_location);
10700 /* For -fpic code the PLT entries might use r12 which is
10701 call-saved. Therefore we cannot do a sibcall when
10702 calling directly using a symbol ref. When reaching
10703 this point we decided (in s390_function_ok_for_sibcall)
10704 to do a sibcall for a function pointer but one of the
10705 optimizers was able to get rid of the function pointer
10706 by propagating the symbol ref into the call. This
10707 optimization is illegal for S/390 so we turn the direct
10708 call into a indirect call again. */
10709 addr_location = force_reg (Pmode, addr_location);
10712 /* Unless we can use the bras(l) insn, force the
10713 routine address into a register. */
10714 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
10717 addr_location = legitimize_pic_address (addr_location, 0);
10719 addr_location = force_reg (Pmode, addr_location);
10723 /* If it is already an indirect call or the code above moved the
10724 SYMBOL_REF to somewhere else make sure the address can be found in
10726 if (retaddr_reg == NULL_RTX
10727 && GET_CODE (addr_location) != SYMBOL_REF
10730 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
10731 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
10734 addr_location = gen_rtx_MEM (QImode, addr_location);
10735 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
10737 if (result_reg != NULL_RTX)
10738 call = gen_rtx_SET (VOIDmode, result_reg, call);
10740 if (retaddr_reg != NULL_RTX)
10742 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
10744 if (tls_call != NULL_RTX)
10745 vec = gen_rtvec (3, call, clobber,
10746 gen_rtx_USE (VOIDmode, tls_call));
10748 vec = gen_rtvec (2, call, clobber);
10750 call = gen_rtx_PARALLEL (VOIDmode, vec);
10753 insn = emit_call_insn (call);
10755 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
10756 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
10758 /* s390_function_ok_for_sibcall should
10759 have denied sibcalls in this case. */
10760 gcc_assert (retaddr_reg != NULL_RTX);
10761 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
10766 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
10769 s390_conditional_register_usage (void)
10775 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10776 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10778 if (TARGET_CPU_ZARCH)
10780 fixed_regs[BASE_REGNUM] = 0;
10781 call_used_regs[BASE_REGNUM] = 0;
10782 fixed_regs[RETURN_REGNUM] = 0;
10783 call_used_regs[RETURN_REGNUM] = 0;
10787 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
10788 call_used_regs[i] = call_really_used_regs[i] = 0;
10792 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
10793 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
10796 if (TARGET_SOFT_FLOAT)
10798 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
10799 call_used_regs[i] = fixed_regs[i] = 1;
10803 /* Corresponding function to eh_return expander. */
10805 static GTY(()) rtx s390_tpf_eh_return_symbol;
10807 s390_emit_tpf_eh_return (rtx target)
10812 if (!s390_tpf_eh_return_symbol)
10813 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
10815 reg = gen_rtx_REG (Pmode, 2);
10816 orig_ra = gen_rtx_REG (Pmode, 3);
10818 emit_move_insn (reg, target);
10819 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
10820 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
10821 gen_rtx_REG (Pmode, RETURN_REGNUM));
10822 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
10823 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
10825 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
10828 /* Rework the prologue/epilogue to avoid saving/restoring
10829 registers unnecessarily. */
10832 s390_optimize_prologue (void)
10834 rtx_insn *insn, *new_insn, *next_insn;
10836 /* Do a final recompute of the frame-related data. */
10837 s390_optimize_register_info ();
10839 /* If all special registers are in fact used, there's nothing we
10840 can do, so no point in walking the insn list. */
10842 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
10843 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
10844 && (TARGET_CPU_ZARCH
10845 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
10846 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
10849 /* Search for prologue/epilogue insns and replace them. */
10851 for (insn = get_insns (); insn; insn = next_insn)
10853 int first, last, off;
10854 rtx set, base, offset;
10857 next_insn = NEXT_INSN (insn);
10859 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10862 pat = PATTERN (insn);
10864 /* Remove ldgr/lgdr instructions used for saving and restore
10865 GPRs if possible. */
10867 && GET_CODE (pat) == SET
10868 && GET_MODE (SET_SRC (pat)) == DImode
10869 && REG_P (SET_SRC (pat))
10870 && REG_P (SET_DEST (pat)))
10872 int src_regno = REGNO (SET_SRC (pat));
10873 int dest_regno = REGNO (SET_DEST (pat));
10877 if (!((GENERAL_REGNO_P (src_regno) && FP_REGNO_P (dest_regno))
10878 || (FP_REGNO_P (src_regno) && GENERAL_REGNO_P (dest_regno))))
10881 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
10882 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
10884 /* GPR must be call-saved, FPR must be call-clobbered. */
10885 if (!call_really_used_regs[fpr_regno]
10886 || call_really_used_regs[gpr_regno])
10889 /* It must not happen that what we once saved in an FPR now
10890 needs a stack slot. */
10891 gcc_assert (cfun_gpr_save_slot (gpr_regno) != -1);
10893 if (cfun_gpr_save_slot (gpr_regno) == 0)
10895 remove_insn (insn);
10900 if (GET_CODE (pat) == PARALLEL
10901 && store_multiple_operation (pat, VOIDmode))
10903 set = XVECEXP (pat, 0, 0);
10904 first = REGNO (SET_SRC (set));
10905 last = first + XVECLEN (pat, 0) - 1;
10906 offset = const0_rtx;
10907 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10908 off = INTVAL (offset);
10910 if (GET_CODE (base) != REG || off < 0)
10912 if (cfun_frame_layout.first_save_gpr != -1
10913 && (cfun_frame_layout.first_save_gpr < first
10914 || cfun_frame_layout.last_save_gpr > last))
10916 if (REGNO (base) != STACK_POINTER_REGNUM
10917 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10919 if (first > BASE_REGNUM || last < BASE_REGNUM)
10922 if (cfun_frame_layout.first_save_gpr != -1)
10924 rtx s_pat = save_gprs (base,
10925 off + (cfun_frame_layout.first_save_gpr
10926 - first) * UNITS_PER_LONG,
10927 cfun_frame_layout.first_save_gpr,
10928 cfun_frame_layout.last_save_gpr);
10929 new_insn = emit_insn_before (s_pat, insn);
10930 INSN_ADDRESSES_NEW (new_insn, -1);
10933 remove_insn (insn);
10937 if (cfun_frame_layout.first_save_gpr == -1
10938 && GET_CODE (pat) == SET
10939 && GENERAL_REG_P (SET_SRC (pat))
10940 && GET_CODE (SET_DEST (pat)) == MEM)
10943 first = REGNO (SET_SRC (set));
10944 offset = const0_rtx;
10945 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10946 off = INTVAL (offset);
10948 if (GET_CODE (base) != REG || off < 0)
10950 if (REGNO (base) != STACK_POINTER_REGNUM
10951 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10954 remove_insn (insn);
10958 if (GET_CODE (pat) == PARALLEL
10959 && load_multiple_operation (pat, VOIDmode))
10961 set = XVECEXP (pat, 0, 0);
10962 first = REGNO (SET_DEST (set));
10963 last = first + XVECLEN (pat, 0) - 1;
10964 offset = const0_rtx;
10965 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
10966 off = INTVAL (offset);
10968 if (GET_CODE (base) != REG || off < 0)
10971 if (cfun_frame_layout.first_restore_gpr != -1
10972 && (cfun_frame_layout.first_restore_gpr < first
10973 || cfun_frame_layout.last_restore_gpr > last))
10975 if (REGNO (base) != STACK_POINTER_REGNUM
10976 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10978 if (first > BASE_REGNUM || last < BASE_REGNUM)
10981 if (cfun_frame_layout.first_restore_gpr != -1)
10983 rtx rpat = restore_gprs (base,
10984 off + (cfun_frame_layout.first_restore_gpr
10985 - first) * UNITS_PER_LONG,
10986 cfun_frame_layout.first_restore_gpr,
10987 cfun_frame_layout.last_restore_gpr);
10989 /* Remove REG_CFA_RESTOREs for registers that we no
10990 longer need to save. */
10991 REG_NOTES (rpat) = REG_NOTES (insn);
10992 for (rtx *ptr = ®_NOTES (rpat); *ptr; )
10993 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
10994 && ((int) REGNO (XEXP (*ptr, 0))
10995 < cfun_frame_layout.first_restore_gpr))
10996 *ptr = XEXP (*ptr, 1);
10998 ptr = &XEXP (*ptr, 1);
10999 new_insn = emit_insn_before (rpat, insn);
11000 RTX_FRAME_RELATED_P (new_insn) = 1;
11001 INSN_ADDRESSES_NEW (new_insn, -1);
11004 remove_insn (insn);
11008 if (cfun_frame_layout.first_restore_gpr == -1
11009 && GET_CODE (pat) == SET
11010 && GENERAL_REG_P (SET_DEST (pat))
11011 && GET_CODE (SET_SRC (pat)) == MEM)
11014 first = REGNO (SET_DEST (set));
11015 offset = const0_rtx;
11016 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
11017 off = INTVAL (offset);
11019 if (GET_CODE (base) != REG || off < 0)
11022 if (REGNO (base) != STACK_POINTER_REGNUM
11023 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
11026 remove_insn (insn);
11032 /* On z10 and later the dynamic branch prediction must see the
11033 backward jump within a certain windows. If not it falls back to
11034 the static prediction. This function rearranges the loop backward
11035 branch in a way which makes the static prediction always correct.
11036 The function returns true if it added an instruction. */
11038 s390_fix_long_loop_prediction (rtx_insn *insn)
11040 rtx set = single_set (insn);
11041 rtx code_label, label_ref, new_label;
11043 rtx_insn *cur_insn;
11047 /* This will exclude branch on count and branch on index patterns
11048 since these are correctly statically predicted. */
11050 || SET_DEST (set) != pc_rtx
11051 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
11054 /* Skip conditional returns. */
11055 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
11056 && XEXP (SET_SRC (set), 2) == pc_rtx)
11059 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
11060 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
11062 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
11064 code_label = XEXP (label_ref, 0);
11066 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
11067 || INSN_ADDRESSES (INSN_UID (insn)) == -1
11068 || (INSN_ADDRESSES (INSN_UID (insn))
11069 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
11072 for (distance = 0, cur_insn = PREV_INSN (insn);
11073 distance < PREDICT_DISTANCE - 6;
11074 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
11075 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
11078 new_label = gen_label_rtx ();
11079 uncond_jump = emit_jump_insn_after (
11080 gen_rtx_SET (VOIDmode, pc_rtx,
11081 gen_rtx_LABEL_REF (VOIDmode, code_label)),
11083 emit_label_after (new_label, uncond_jump);
11085 tmp = XEXP (SET_SRC (set), 1);
11086 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
11087 XEXP (SET_SRC (set), 2) = tmp;
11088 INSN_CODE (insn) = -1;
11090 XEXP (label_ref, 0) = new_label;
11091 JUMP_LABEL (insn) = new_label;
11092 JUMP_LABEL (uncond_jump) = code_label;
11097 /* Returns 1 if INSN reads the value of REG for purposes not related
11098 to addressing of memory, and 0 otherwise. */
11100 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
11102 return reg_referenced_p (reg, PATTERN (insn))
11103 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
11106 /* Starting from INSN find_cond_jump looks downwards in the insn
11107 stream for a single jump insn which is the last user of the
11108 condition code set in INSN. */
11110 find_cond_jump (rtx_insn *insn)
11112 for (; insn; insn = NEXT_INSN (insn))
11116 if (LABEL_P (insn))
11119 if (!JUMP_P (insn))
11121 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
11126 /* This will be triggered by a return. */
11127 if (GET_CODE (PATTERN (insn)) != SET)
11130 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
11131 ite = SET_SRC (PATTERN (insn));
11133 if (GET_CODE (ite) != IF_THEN_ELSE)
11136 cc = XEXP (XEXP (ite, 0), 0);
11137 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
11140 if (find_reg_note (insn, REG_DEAD, cc))
11148 /* Swap the condition in COND and the operands in OP0 and OP1 so that
11149 the semantics does not change. If NULL_RTX is passed as COND the
11150 function tries to find the conditional jump starting with INSN. */
11152 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
11156 if (cond == NULL_RTX)
11158 rtx jump = find_cond_jump (NEXT_INSN (insn));
11159 jump = jump ? single_set (jump) : NULL_RTX;
11161 if (jump == NULL_RTX)
11164 cond = XEXP (XEXP (jump, 1), 0);
11169 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
11172 /* On z10, instructions of the compare-and-branch family have the
11173 property to access the register occurring as second operand with
11174 its bits complemented. If such a compare is grouped with a second
11175 instruction that accesses the same register non-complemented, and
11176 if that register's value is delivered via a bypass, then the
11177 pipeline recycles, thereby causing significant performance decline.
11178 This function locates such situations and exchanges the two
11179 operands of the compare. The function return true whenever it
11182 s390_z10_optimize_cmp (rtx_insn *insn)
11184 rtx_insn *prev_insn, *next_insn;
11185 bool insn_added_p = false;
11186 rtx cond, *op0, *op1;
11188 if (GET_CODE (PATTERN (insn)) == PARALLEL)
11190 /* Handle compare and branch and branch on count
11192 rtx pattern = single_set (insn);
11195 || SET_DEST (pattern) != pc_rtx
11196 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
11199 cond = XEXP (SET_SRC (pattern), 0);
11200 op0 = &XEXP (cond, 0);
11201 op1 = &XEXP (cond, 1);
11203 else if (GET_CODE (PATTERN (insn)) == SET)
11207 /* Handle normal compare instructions. */
11208 src = SET_SRC (PATTERN (insn));
11209 dest = SET_DEST (PATTERN (insn));
11212 || !CC_REGNO_P (REGNO (dest))
11213 || GET_CODE (src) != COMPARE)
11216 /* s390_swap_cmp will try to find the conditional
11217 jump when passing NULL_RTX as condition. */
11219 op0 = &XEXP (src, 0);
11220 op1 = &XEXP (src, 1);
11225 if (!REG_P (*op0) || !REG_P (*op1))
11228 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
11231 /* Swap the COMPARE arguments and its mask if there is a
11232 conflicting access in the previous insn. */
11233 prev_insn = prev_active_insn (insn);
11234 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
11235 && reg_referenced_p (*op1, PATTERN (prev_insn)))
11236 s390_swap_cmp (cond, op0, op1, insn);
11238 /* Check if there is a conflict with the next insn. If there
11239 was no conflict with the previous insn, then swap the
11240 COMPARE arguments and its mask. If we already swapped
11241 the operands, or if swapping them would cause a conflict
11242 with the previous insn, issue a NOP after the COMPARE in
11243 order to separate the two instuctions. */
11244 next_insn = next_active_insn (insn);
11245 if (next_insn != NULL_RTX && INSN_P (next_insn)
11246 && s390_non_addr_reg_read_p (*op1, next_insn))
11248 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
11249 && s390_non_addr_reg_read_p (*op0, prev_insn))
11251 if (REGNO (*op1) == 0)
11252 emit_insn_after (gen_nop1 (), insn);
11254 emit_insn_after (gen_nop (), insn);
11255 insn_added_p = true;
11258 s390_swap_cmp (cond, op0, op1, insn);
11260 return insn_added_p;
11263 /* Perform machine-dependent processing. */
11268 bool pool_overflow = false;
11270 /* Make sure all splits have been performed; splits after
11271 machine_dependent_reorg might confuse insn length counts. */
11272 split_all_insns_noflow ();
11274 /* Install the main literal pool and the associated base
11275 register load insns.
11277 In addition, there are two problematic situations we need
11280 - the literal pool might be > 4096 bytes in size, so that
11281 some of its elements cannot be directly accessed
11283 - a branch target might be > 64K away from the branch, so that
11284 it is not possible to use a PC-relative instruction.
11286 To fix those, we split the single literal pool into multiple
11287 pool chunks, reloading the pool base register at various
11288 points throughout the function to ensure it always points to
11289 the pool chunk the following code expects, and / or replace
11290 PC-relative branches by absolute branches.
11292 However, the two problems are interdependent: splitting the
11293 literal pool can move a branch further away from its target,
11294 causing the 64K limit to overflow, and on the other hand,
11295 replacing a PC-relative branch by an absolute branch means
11296 we need to put the branch target address into the literal
11297 pool, possibly causing it to overflow.
11299 So, we loop trying to fix up both problems until we manage
11300 to satisfy both conditions at the same time. Note that the
11301 loop is guaranteed to terminate as every pass of the loop
11302 strictly decreases the total number of PC-relative branches
11303 in the function. (This is not completely true as there
11304 might be branch-over-pool insns introduced by chunkify_start.
11305 Those never need to be split however.) */
11309 struct constant_pool *pool = NULL;
11311 /* Collect the literal pool. */
11312 if (!pool_overflow)
11314 pool = s390_mainpool_start ();
11316 pool_overflow = true;
11319 /* If literal pool overflowed, start to chunkify it. */
11321 pool = s390_chunkify_start ();
11323 /* Split out-of-range branches. If this has created new
11324 literal pool entries, cancel current chunk list and
11325 recompute it. zSeries machines have large branch
11326 instructions, so we never need to split a branch. */
11327 if (!TARGET_CPU_ZARCH && s390_split_branches ())
11330 s390_chunkify_cancel (pool);
11332 s390_mainpool_cancel (pool);
11337 /* If we made it up to here, both conditions are satisfied.
11338 Finish up literal pool related changes. */
11340 s390_chunkify_finish (pool);
11342 s390_mainpool_finish (pool);
11344 /* We're done splitting branches. */
11345 cfun->machine->split_branches_pending_p = false;
11349 /* Generate out-of-pool execute target insns. */
11350 if (TARGET_CPU_ZARCH)
11352 rtx_insn *insn, *target;
11355 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11357 label = s390_execute_label (insn);
11361 gcc_assert (label != const0_rtx);
11363 target = emit_label (XEXP (label, 0));
11364 INSN_ADDRESSES_NEW (target, -1);
11366 target = emit_insn (s390_execute_target (insn));
11367 INSN_ADDRESSES_NEW (target, -1);
11371 /* Try to optimize prologue and epilogue further. */
11372 s390_optimize_prologue ();
11374 /* Walk over the insns and do some >=z10 specific changes. */
11375 if (s390_tune == PROCESSOR_2097_Z10
11376 || s390_tune == PROCESSOR_2817_Z196
11377 || s390_tune == PROCESSOR_2827_ZEC12)
11380 bool insn_added_p = false;
11382 /* The insn lengths and addresses have to be up to date for the
11383 following manipulations. */
11384 shorten_branches (get_insns ());
11386 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11388 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
11392 insn_added_p |= s390_fix_long_loop_prediction (insn);
11394 if ((GET_CODE (PATTERN (insn)) == PARALLEL
11395 || GET_CODE (PATTERN (insn)) == SET)
11396 && s390_tune == PROCESSOR_2097_Z10)
11397 insn_added_p |= s390_z10_optimize_cmp (insn);
11400 /* Adjust branches if we added new instructions. */
11402 shorten_branches (get_insns ());
11406 /* Return true if INSN is a fp load insn writing register REGNO. */
11408 s390_fpload_toreg (rtx insn, unsigned int regno)
11411 enum attr_type flag = s390_safe_attr_type (insn);
11413 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
11416 set = single_set (insn);
11418 if (set == NULL_RTX)
11421 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
11424 if (REGNO (SET_DEST (set)) != regno)
11430 /* This value describes the distance to be avoided between an
11431 aritmetic fp instruction and an fp load writing the same register.
11432 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
11433 fine but the exact value has to be avoided. Otherwise the FP
11434 pipeline will throw an exception causing a major penalty. */
11435 #define Z10_EARLYLOAD_DISTANCE 7
11437 /* Rearrange the ready list in order to avoid the situation described
11438 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
11439 moved to the very end of the ready list. */
11441 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
11443 unsigned int regno;
11444 int nready = *nready_p;
11449 enum attr_type flag;
11452 /* Skip DISTANCE - 1 active insns. */
11453 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
11454 distance > 0 && insn != NULL_RTX;
11455 distance--, insn = prev_active_insn (insn))
11456 if (CALL_P (insn) || JUMP_P (insn))
11459 if (insn == NULL_RTX)
11462 set = single_set (insn);
11464 if (set == NULL_RTX || !REG_P (SET_DEST (set))
11465 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
11468 flag = s390_safe_attr_type (insn);
11470 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
11473 regno = REGNO (SET_DEST (set));
11476 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
11483 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
11488 /* The s390_sched_state variable tracks the state of the current or
11489 the last instruction group.
11491 0,1,2 number of instructions scheduled in the current group
11492 3 the last group is complete - normal insns
11493 4 the last group was a cracked/expanded insn */
11495 static int s390_sched_state;
11497 #define S390_OOO_SCHED_STATE_NORMAL 3
11498 #define S390_OOO_SCHED_STATE_CRACKED 4
11500 #define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1
11501 #define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2
11502 #define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4
11503 #define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
11505 static unsigned int
11506 s390_get_sched_attrmask (rtx insn)
11508 unsigned int mask = 0;
11510 if (get_attr_ooo_cracked (insn))
11511 mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
11512 if (get_attr_ooo_expanded (insn))
11513 mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
11514 if (get_attr_ooo_endgroup (insn))
11515 mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
11516 if (get_attr_ooo_groupalone (insn))
11517 mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
11521 /* Return the scheduling score for INSN. The higher the score the
11522 better. The score is calculated from the OOO scheduling attributes
11523 of INSN and the scheduling state s390_sched_state. */
11525 s390_sched_score (rtx insn)
11527 unsigned int mask = s390_get_sched_attrmask (insn);
11530 switch (s390_sched_state)
11533 /* Try to put insns into the first slot which would otherwise
11535 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
11536 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
11538 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
11541 /* Prefer not cracked insns while trying to put together a
11543 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
11544 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
11545 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
11547 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
11551 /* Prefer not cracked insns while trying to put together a
11553 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
11554 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
11555 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
11557 /* Prefer endgroup insns in the last slot. */
11558 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
11561 case S390_OOO_SCHED_STATE_NORMAL:
11562 /* Prefer not cracked insns if the last was not cracked. */
11563 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
11564 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
11566 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
11569 case S390_OOO_SCHED_STATE_CRACKED:
11570 /* Try to keep cracked insns together to prevent them from
11571 interrupting groups. */
11572 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
11573 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
11580 /* This function is called via hook TARGET_SCHED_REORDER before
11581 issuing one insn from list READY which contains *NREADYP entries.
11582 For target z10 it reorders load instructions to avoid early load
11583 conflicts in the floating point pipeline */
11585 s390_sched_reorder (FILE *file, int verbose,
11586 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
11588 if (s390_tune == PROCESSOR_2097_Z10)
11589 if (reload_completed && *nreadyp > 1)
11590 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
11592 if (s390_tune == PROCESSOR_2827_ZEC12
11593 && reload_completed
11597 int last_index = *nreadyp - 1;
11598 int max_index = -1;
11599 int max_score = -1;
11602 /* Just move the insn with the highest score to the top (the
11603 end) of the list. A full sort is not needed since a conflict
11604 in the hazard recognition cannot happen. So the top insn in
11605 the ready list will always be taken. */
11606 for (i = last_index; i >= 0; i--)
11610 if (recog_memoized (ready[i]) < 0)
11613 score = s390_sched_score (ready[i]);
11614 if (score > max_score)
11621 if (max_index != -1)
11623 if (max_index != last_index)
11625 tmp = ready[max_index];
11626 ready[max_index] = ready[last_index];
11627 ready[last_index] = tmp;
11631 "move insn %d to the top of list\n",
11632 INSN_UID (ready[last_index]));
11634 else if (verbose > 5)
11636 "best insn %d already on top\n",
11637 INSN_UID (ready[last_index]));
11642 fprintf (file, "ready list ooo attributes - sched state: %d\n",
11645 for (i = last_index; i >= 0; i--)
11647 if (recog_memoized (ready[i]) < 0)
11649 fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
11650 s390_sched_score (ready[i]));
11651 #define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
11652 PRINT_OOO_ATTR (ooo_cracked);
11653 PRINT_OOO_ATTR (ooo_expanded);
11654 PRINT_OOO_ATTR (ooo_endgroup);
11655 PRINT_OOO_ATTR (ooo_groupalone);
11656 #undef PRINT_OOO_ATTR
11657 fprintf (file, "\n");
11662 return s390_issue_rate ();
11666 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
11667 the scheduler has issued INSN. It stores the last issued insn into
11668 last_scheduled_insn in order to make it available for
11669 s390_sched_reorder. */
11671 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
11673 last_scheduled_insn = insn;
11675 if (s390_tune == PROCESSOR_2827_ZEC12
11676 && reload_completed
11677 && recog_memoized (insn) >= 0)
11679 unsigned int mask = s390_get_sched_attrmask (insn);
11681 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
11682 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
11683 s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
11684 else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
11685 || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
11686 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
11689 /* Only normal insns are left (mask == 0). */
11690 switch (s390_sched_state)
11695 case S390_OOO_SCHED_STATE_NORMAL:
11696 if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
11697 s390_sched_state = 1;
11699 s390_sched_state++;
11702 case S390_OOO_SCHED_STATE_CRACKED:
11703 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
11709 fprintf (file, "insn %d: ", INSN_UID (insn));
11710 #define PRINT_OOO_ATTR(ATTR) \
11711 fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
11712 PRINT_OOO_ATTR (ooo_cracked);
11713 PRINT_OOO_ATTR (ooo_expanded);
11714 PRINT_OOO_ATTR (ooo_endgroup);
11715 PRINT_OOO_ATTR (ooo_groupalone);
11716 #undef PRINT_OOO_ATTR
11717 fprintf (file, "\n");
11718 fprintf (file, "sched state: %d\n", s390_sched_state);
11722 if (GET_CODE (PATTERN (insn)) != USE
11723 && GET_CODE (PATTERN (insn)) != CLOBBER)
11730 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
11731 int verbose ATTRIBUTE_UNUSED,
11732 int max_ready ATTRIBUTE_UNUSED)
11734 last_scheduled_insn = NULL;
11735 s390_sched_state = 0;
11738 /* This function checks the whole of insn X for memory references. The
11739 function always returns zero because the framework it is called
11740 from would stop recursively analyzing the insn upon a return value
11741 other than zero. The real result of this function is updating
11742 counter variable MEM_COUNT. */
11744 check_dpu (rtx *x, unsigned *mem_count)
11746 if (*x != NULL_RTX && MEM_P (*x))
11751 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
11752 a new number struct loop *loop should be unrolled if tuned for cpus with
11753 a built-in stride prefetcher.
11754 The loop is analyzed for memory accesses by calling check_dpu for
11755 each rtx of the loop. Depending on the loop_depth and the amount of
11756 memory accesses a new number <=nunroll is returned to improve the
11757 behaviour of the hardware prefetch unit. */
11759 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
11764 unsigned mem_count = 0;
11766 if (s390_tune != PROCESSOR_2097_Z10
11767 && s390_tune != PROCESSOR_2817_Z196
11768 && s390_tune != PROCESSOR_2827_ZEC12)
11771 /* Count the number of memory references within the loop body. */
11772 bbs = get_loop_body (loop);
11773 for (i = 0; i < loop->num_nodes; i++)
11775 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
11776 if (INSN_P (insn) && INSN_CODE (insn) != -1)
11777 for_each_rtx_in_insn (&insn, (rtx_function) check_dpu, &mem_count);
11781 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
11782 if (mem_count == 0)
11785 switch (loop_depth(loop))
11788 return MIN (nunroll, 28 / mem_count);
11790 return MIN (nunroll, 22 / mem_count);
11792 return MIN (nunroll, 16 / mem_count);
11797 s390_option_override (void)
11800 cl_deferred_option *opt;
11801 vec<cl_deferred_option> *v =
11802 (vec<cl_deferred_option> *) s390_deferred_options;
11805 FOR_EACH_VEC_ELT (*v, i, opt)
11807 switch (opt->opt_index)
11809 case OPT_mhotpatch:
11810 s390_hotpatch_trampoline_halfwords = (opt->value) ?
11811 s390_hotpatch_trampoline_halfwords_default : -1;
11813 case OPT_mhotpatch_:
11817 val = integral_argument (opt->arg);
11820 /* argument is not a plain number */
11821 error ("argument to %qs should be a non-negative integer",
11825 else if (val > s390_hotpatch_trampoline_halfwords_max)
11827 error ("argument to %qs is too large (max. %d)",
11828 "-mhotpatch=", s390_hotpatch_trampoline_halfwords_max);
11831 s390_hotpatch_trampoline_halfwords = val;
11835 gcc_unreachable ();
11839 /* Set up function hooks. */
11840 init_machine_status = s390_init_machine_status;
11842 /* Architecture mode defaults according to ABI. */
11843 if (!(target_flags_explicit & MASK_ZARCH))
11846 target_flags |= MASK_ZARCH;
11848 target_flags &= ~MASK_ZARCH;
11851 /* Set the march default in case it hasn't been specified on
11853 if (s390_arch == PROCESSOR_max)
11855 s390_arch_string = TARGET_ZARCH? "z900" : "g5";
11856 s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5;
11857 s390_arch_flags = processor_flags_table[(int)s390_arch];
11860 /* Determine processor to tune for. */
11861 if (s390_tune == PROCESSOR_max)
11863 s390_tune = s390_arch;
11864 s390_tune_flags = s390_arch_flags;
11867 /* Sanity checks. */
11868 if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
11869 error ("z/Architecture mode not supported on %s", s390_arch_string);
11870 if (TARGET_64BIT && !TARGET_ZARCH)
11871 error ("64-bit ABI not supported in ESA/390 mode");
11873 /* Use hardware DFP if available and not explicitly disabled by
11874 user. E.g. with -m31 -march=z10 -mzarch */
11875 if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
11876 target_flags |= MASK_HARD_DFP;
11878 /* Enable hardware transactions if available and not explicitly
11879 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
11880 if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
11881 target_flags |= MASK_OPT_HTM;
11883 if (TARGET_HARD_DFP && !TARGET_DFP)
11885 if (target_flags_explicit & MASK_HARD_DFP)
11887 if (!TARGET_CPU_DFP)
11888 error ("hardware decimal floating point instructions"
11889 " not available on %s", s390_arch_string);
11891 error ("hardware decimal floating point instructions"
11892 " not available in ESA/390 mode");
11895 target_flags &= ~MASK_HARD_DFP;
11898 if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
11900 if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
11901 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
11903 target_flags &= ~MASK_HARD_DFP;
11906 /* Set processor cost function. */
11909 case PROCESSOR_2084_Z990:
11910 s390_cost = &z990_cost;
11912 case PROCESSOR_2094_Z9_109:
11913 s390_cost = &z9_109_cost;
11915 case PROCESSOR_2097_Z10:
11916 s390_cost = &z10_cost;
11918 case PROCESSOR_2817_Z196:
11919 s390_cost = &z196_cost;
11921 case PROCESSOR_2827_ZEC12:
11922 s390_cost = &zEC12_cost;
11925 s390_cost = &z900_cost;
11928 if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
11929 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
11932 if (s390_stack_size)
11934 if (s390_stack_guard >= s390_stack_size)
11935 error ("stack size must be greater than the stack guard value");
11936 else if (s390_stack_size > 1 << 16)
11937 error ("stack size must not be greater than 64k");
11939 else if (s390_stack_guard)
11940 error ("-mstack-guard implies use of -mstack-size");
11942 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
11943 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
11944 target_flags |= MASK_LONG_DOUBLE_128;
11947 if (s390_tune == PROCESSOR_2097_Z10
11948 || s390_tune == PROCESSOR_2817_Z196
11949 || s390_tune == PROCESSOR_2827_ZEC12)
11951 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
11952 global_options.x_param_values,
11953 global_options_set.x_param_values);
11954 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
11955 global_options.x_param_values,
11956 global_options_set.x_param_values);
11957 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
11958 global_options.x_param_values,
11959 global_options_set.x_param_values);
11960 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
11961 global_options.x_param_values,
11962 global_options_set.x_param_values);
11965 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
11966 global_options.x_param_values,
11967 global_options_set.x_param_values);
11968 /* values for loop prefetching */
11969 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
11970 global_options.x_param_values,
11971 global_options_set.x_param_values);
11972 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
11973 global_options.x_param_values,
11974 global_options_set.x_param_values);
11975 /* s390 has more than 2 levels and the size is much larger. Since
11976 we are always running virtualized assume that we only get a small
11977 part of the caches above l1. */
11978 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
11979 global_options.x_param_values,
11980 global_options_set.x_param_values);
11981 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
11982 global_options.x_param_values,
11983 global_options_set.x_param_values);
11984 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
11985 global_options.x_param_values,
11986 global_options_set.x_param_values);
11988 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
11989 requires the arch flags to be evaluated already. Since prefetching
11990 is beneficial on s390, we enable it if available. */
11991 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
11992 flag_prefetch_loop_arrays = 1;
11994 /* Use the alternative scheduling-pressure algorithm by default. */
11995 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
11996 global_options.x_param_values,
11997 global_options_set.x_param_values);
12001 /* Don't emit DWARF3/4 unless specifically selected. The TPF
12002 debuggers do not yet support DWARF 3/4. */
12003 if (!global_options_set.x_dwarf_strict)
12005 if (!global_options_set.x_dwarf_version)
12009 /* Register a target-specific optimization-and-lowering pass
12010 to run immediately before prologue and epilogue generation.
12012 Registering the pass must be done at start up. It's
12013 convenient to do it here. */
12014 opt_pass *new_pass = new pass_s390_early_mach (g);
12015 struct register_pass_info insert_pass_s390_early_mach =
12017 new_pass, /* pass */
12018 "pro_and_epilogue", /* reference_pass_name */
12019 1, /* ref_pass_instance_number */
12020 PASS_POS_INSERT_BEFORE /* po_op */
12022 register_pass (&insert_pass_s390_early_mach);
12025 /* Initialize GCC target structure. */
12027 #undef TARGET_ASM_ALIGNED_HI_OP
12028 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
12029 #undef TARGET_ASM_ALIGNED_DI_OP
12030 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
12031 #undef TARGET_ASM_INTEGER
12032 #define TARGET_ASM_INTEGER s390_assemble_integer
12034 #undef TARGET_ASM_OPEN_PAREN
12035 #define TARGET_ASM_OPEN_PAREN ""
12037 #undef TARGET_ASM_CLOSE_PAREN
12038 #define TARGET_ASM_CLOSE_PAREN ""
12040 #undef TARGET_OPTION_OVERRIDE
12041 #define TARGET_OPTION_OVERRIDE s390_option_override
12043 #undef TARGET_ENCODE_SECTION_INFO
12044 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
12046 #undef TARGET_SCALAR_MODE_SUPPORTED_P
12047 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
12050 #undef TARGET_HAVE_TLS
12051 #define TARGET_HAVE_TLS true
12053 #undef TARGET_CANNOT_FORCE_CONST_MEM
12054 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
12056 #undef TARGET_DELEGITIMIZE_ADDRESS
12057 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
12059 #undef TARGET_LEGITIMIZE_ADDRESS
12060 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
12062 #undef TARGET_RETURN_IN_MEMORY
12063 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
12065 #undef TARGET_INIT_BUILTINS
12066 #define TARGET_INIT_BUILTINS s390_init_builtins
12067 #undef TARGET_EXPAND_BUILTIN
12068 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
12070 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
12071 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
12073 #undef TARGET_ASM_OUTPUT_MI_THUNK
12074 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
12075 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
12076 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
12078 #undef TARGET_SCHED_ADJUST_PRIORITY
12079 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
12080 #undef TARGET_SCHED_ISSUE_RATE
12081 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
12082 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
12083 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
12085 #undef TARGET_SCHED_VARIABLE_ISSUE
12086 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
12087 #undef TARGET_SCHED_REORDER
12088 #define TARGET_SCHED_REORDER s390_sched_reorder
12089 #undef TARGET_SCHED_INIT
12090 #define TARGET_SCHED_INIT s390_sched_init
12092 #undef TARGET_CANNOT_COPY_INSN_P
12093 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
12094 #undef TARGET_RTX_COSTS
12095 #define TARGET_RTX_COSTS s390_rtx_costs
12096 #undef TARGET_ADDRESS_COST
12097 #define TARGET_ADDRESS_COST s390_address_cost
12098 #undef TARGET_REGISTER_MOVE_COST
12099 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
12100 #undef TARGET_MEMORY_MOVE_COST
12101 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
12103 #undef TARGET_MACHINE_DEPENDENT_REORG
12104 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
12106 #undef TARGET_VALID_POINTER_MODE
12107 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
12109 #undef TARGET_BUILD_BUILTIN_VA_LIST
12110 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
12111 #undef TARGET_EXPAND_BUILTIN_VA_START
12112 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
12113 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
12114 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
12116 #undef TARGET_PROMOTE_FUNCTION_MODE
12117 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
12118 #undef TARGET_PASS_BY_REFERENCE
12119 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
12121 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
12122 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
12123 #undef TARGET_FUNCTION_ARG
12124 #define TARGET_FUNCTION_ARG s390_function_arg
12125 #undef TARGET_FUNCTION_ARG_ADVANCE
12126 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
12127 #undef TARGET_FUNCTION_VALUE
12128 #define TARGET_FUNCTION_VALUE s390_function_value
12129 #undef TARGET_LIBCALL_VALUE
12130 #define TARGET_LIBCALL_VALUE s390_libcall_value
12132 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
12133 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
12135 #undef TARGET_FIXED_CONDITION_CODE_REGS
12136 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
12138 #undef TARGET_CC_MODES_COMPATIBLE
12139 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
12141 #undef TARGET_INVALID_WITHIN_DOLOOP
12142 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
12145 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
12146 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
12149 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12150 #undef TARGET_MANGLE_TYPE
12151 #define TARGET_MANGLE_TYPE s390_mangle_type
12154 #undef TARGET_SCALAR_MODE_SUPPORTED_P
12155 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
12157 #undef TARGET_PREFERRED_RELOAD_CLASS
12158 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
12160 #undef TARGET_SECONDARY_RELOAD
12161 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
12163 #undef TARGET_LIBGCC_CMP_RETURN_MODE
12164 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
12166 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
12167 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
12169 #undef TARGET_LEGITIMATE_ADDRESS_P
12170 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
12172 #undef TARGET_LEGITIMATE_CONSTANT_P
12173 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
12175 #undef TARGET_LRA_P
12176 #define TARGET_LRA_P s390_lra_p
12178 #undef TARGET_CAN_ELIMINATE
12179 #define TARGET_CAN_ELIMINATE s390_can_eliminate
12181 #undef TARGET_CONDITIONAL_REGISTER_USAGE
12182 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
12184 #undef TARGET_LOOP_UNROLL_ADJUST
12185 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
12187 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
12188 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
12189 #undef TARGET_TRAMPOLINE_INIT
12190 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
12192 #undef TARGET_UNWIND_WORD_MODE
12193 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
12195 #undef TARGET_CANONICALIZE_COMPARISON
12196 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
12198 #undef TARGET_HARD_REGNO_SCRATCH_OK
12199 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
12201 #undef TARGET_ATTRIBUTE_TABLE
12202 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
12204 #undef TARGET_CAN_INLINE_P
12205 #define TARGET_CAN_INLINE_P s390_can_inline_p
12207 #undef TARGET_SET_UP_BY_PROLOGUE
12208 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
12210 struct gcc_target targetm = TARGET_INITIALIZER;
12212 #include "gt-s390.h"