1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2014 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
29 #include "print-tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
40 #include "insn-attr.h"
52 #include "diagnostic-core.h"
54 #include "dominance.h"
60 #include "cfgcleanup.h"
61 #include "basic-block.h"
64 #include "target-def.h"
66 #include "langhooks.h"
68 #include "hash-table.h"
69 #include "tree-ssa-alias.h"
70 #include "internal-fn.h"
71 #include "gimple-fold.h"
73 #include "gimple-expr.h"
81 #include "tree-pass.h"
86 /* Define the specific costs for a given cpu. */
88 struct processor_costs
91 const int m; /* cost of an M instruction. */
92 const int mghi; /* cost of an MGHI instruction. */
93 const int mh; /* cost of an MH instruction. */
94 const int mhi; /* cost of an MHI instruction. */
95 const int ml; /* cost of an ML instruction. */
96 const int mr; /* cost of an MR instruction. */
97 const int ms; /* cost of an MS instruction. */
98 const int msg; /* cost of an MSG instruction. */
99 const int msgf; /* cost of an MSGF instruction. */
100 const int msgfr; /* cost of an MSGFR instruction. */
101 const int msgr; /* cost of an MSGR instruction. */
102 const int msr; /* cost of an MSR instruction. */
103 const int mult_df; /* cost of multiplication in DFmode. */
106 const int sqxbr; /* cost of square root in TFmode. */
107 const int sqdbr; /* cost of square root in DFmode. */
108 const int sqebr; /* cost of square root in SFmode. */
109 /* multiply and add */
110 const int madbr; /* cost of multiply and add in DFmode. */
111 const int maebr; /* cost of multiply and add in SFmode. */
123 const struct processor_costs *s390_cost;
126 struct processor_costs z900_cost =
128 COSTS_N_INSNS (5), /* M */
129 COSTS_N_INSNS (10), /* MGHI */
130 COSTS_N_INSNS (5), /* MH */
131 COSTS_N_INSNS (4), /* MHI */
132 COSTS_N_INSNS (5), /* ML */
133 COSTS_N_INSNS (5), /* MR */
134 COSTS_N_INSNS (4), /* MS */
135 COSTS_N_INSNS (15), /* MSG */
136 COSTS_N_INSNS (7), /* MSGF */
137 COSTS_N_INSNS (7), /* MSGFR */
138 COSTS_N_INSNS (10), /* MSGR */
139 COSTS_N_INSNS (4), /* MSR */
140 COSTS_N_INSNS (7), /* multiplication in DFmode */
141 COSTS_N_INSNS (13), /* MXBR */
142 COSTS_N_INSNS (136), /* SQXBR */
143 COSTS_N_INSNS (44), /* SQDBR */
144 COSTS_N_INSNS (35), /* SQEBR */
145 COSTS_N_INSNS (18), /* MADBR */
146 COSTS_N_INSNS (13), /* MAEBR */
147 COSTS_N_INSNS (134), /* DXBR */
148 COSTS_N_INSNS (30), /* DDBR */
149 COSTS_N_INSNS (27), /* DEBR */
150 COSTS_N_INSNS (220), /* DLGR */
151 COSTS_N_INSNS (34), /* DLR */
152 COSTS_N_INSNS (34), /* DR */
153 COSTS_N_INSNS (32), /* DSGFR */
154 COSTS_N_INSNS (32), /* DSGR */
158 struct processor_costs z990_cost =
160 COSTS_N_INSNS (4), /* M */
161 COSTS_N_INSNS (2), /* MGHI */
162 COSTS_N_INSNS (2), /* MH */
163 COSTS_N_INSNS (2), /* MHI */
164 COSTS_N_INSNS (4), /* ML */
165 COSTS_N_INSNS (4), /* MR */
166 COSTS_N_INSNS (5), /* MS */
167 COSTS_N_INSNS (6), /* MSG */
168 COSTS_N_INSNS (4), /* MSGF */
169 COSTS_N_INSNS (4), /* MSGFR */
170 COSTS_N_INSNS (4), /* MSGR */
171 COSTS_N_INSNS (4), /* MSR */
172 COSTS_N_INSNS (1), /* multiplication in DFmode */
173 COSTS_N_INSNS (28), /* MXBR */
174 COSTS_N_INSNS (130), /* SQXBR */
175 COSTS_N_INSNS (66), /* SQDBR */
176 COSTS_N_INSNS (38), /* SQEBR */
177 COSTS_N_INSNS (1), /* MADBR */
178 COSTS_N_INSNS (1), /* MAEBR */
179 COSTS_N_INSNS (60), /* DXBR */
180 COSTS_N_INSNS (40), /* DDBR */
181 COSTS_N_INSNS (26), /* DEBR */
182 COSTS_N_INSNS (176), /* DLGR */
183 COSTS_N_INSNS (31), /* DLR */
184 COSTS_N_INSNS (31), /* DR */
185 COSTS_N_INSNS (31), /* DSGFR */
186 COSTS_N_INSNS (31), /* DSGR */
190 struct processor_costs z9_109_cost =
192 COSTS_N_INSNS (4), /* M */
193 COSTS_N_INSNS (2), /* MGHI */
194 COSTS_N_INSNS (2), /* MH */
195 COSTS_N_INSNS (2), /* MHI */
196 COSTS_N_INSNS (4), /* ML */
197 COSTS_N_INSNS (4), /* MR */
198 COSTS_N_INSNS (5), /* MS */
199 COSTS_N_INSNS (6), /* MSG */
200 COSTS_N_INSNS (4), /* MSGF */
201 COSTS_N_INSNS (4), /* MSGFR */
202 COSTS_N_INSNS (4), /* MSGR */
203 COSTS_N_INSNS (4), /* MSR */
204 COSTS_N_INSNS (1), /* multiplication in DFmode */
205 COSTS_N_INSNS (28), /* MXBR */
206 COSTS_N_INSNS (130), /* SQXBR */
207 COSTS_N_INSNS (66), /* SQDBR */
208 COSTS_N_INSNS (38), /* SQEBR */
209 COSTS_N_INSNS (1), /* MADBR */
210 COSTS_N_INSNS (1), /* MAEBR */
211 COSTS_N_INSNS (60), /* DXBR */
212 COSTS_N_INSNS (40), /* DDBR */
213 COSTS_N_INSNS (26), /* DEBR */
214 COSTS_N_INSNS (30), /* DLGR */
215 COSTS_N_INSNS (23), /* DLR */
216 COSTS_N_INSNS (23), /* DR */
217 COSTS_N_INSNS (24), /* DSGFR */
218 COSTS_N_INSNS (24), /* DSGR */
222 struct processor_costs z10_cost =
224 COSTS_N_INSNS (10), /* M */
225 COSTS_N_INSNS (10), /* MGHI */
226 COSTS_N_INSNS (10), /* MH */
227 COSTS_N_INSNS (10), /* MHI */
228 COSTS_N_INSNS (10), /* ML */
229 COSTS_N_INSNS (10), /* MR */
230 COSTS_N_INSNS (10), /* MS */
231 COSTS_N_INSNS (10), /* MSG */
232 COSTS_N_INSNS (10), /* MSGF */
233 COSTS_N_INSNS (10), /* MSGFR */
234 COSTS_N_INSNS (10), /* MSGR */
235 COSTS_N_INSNS (10), /* MSR */
236 COSTS_N_INSNS (1) , /* multiplication in DFmode */
237 COSTS_N_INSNS (50), /* MXBR */
238 COSTS_N_INSNS (120), /* SQXBR */
239 COSTS_N_INSNS (52), /* SQDBR */
240 COSTS_N_INSNS (38), /* SQEBR */
241 COSTS_N_INSNS (1), /* MADBR */
242 COSTS_N_INSNS (1), /* MAEBR */
243 COSTS_N_INSNS (111), /* DXBR */
244 COSTS_N_INSNS (39), /* DDBR */
245 COSTS_N_INSNS (32), /* DEBR */
246 COSTS_N_INSNS (160), /* DLGR */
247 COSTS_N_INSNS (71), /* DLR */
248 COSTS_N_INSNS (71), /* DR */
249 COSTS_N_INSNS (71), /* DSGFR */
250 COSTS_N_INSNS (71), /* DSGR */
254 struct processor_costs z196_cost =
256 COSTS_N_INSNS (7), /* M */
257 COSTS_N_INSNS (5), /* MGHI */
258 COSTS_N_INSNS (5), /* MH */
259 COSTS_N_INSNS (5), /* MHI */
260 COSTS_N_INSNS (7), /* ML */
261 COSTS_N_INSNS (7), /* MR */
262 COSTS_N_INSNS (6), /* MS */
263 COSTS_N_INSNS (8), /* MSG */
264 COSTS_N_INSNS (6), /* MSGF */
265 COSTS_N_INSNS (6), /* MSGFR */
266 COSTS_N_INSNS (8), /* MSGR */
267 COSTS_N_INSNS (6), /* MSR */
268 COSTS_N_INSNS (1) , /* multiplication in DFmode */
269 COSTS_N_INSNS (40), /* MXBR B+40 */
270 COSTS_N_INSNS (100), /* SQXBR B+100 */
271 COSTS_N_INSNS (42), /* SQDBR B+42 */
272 COSTS_N_INSNS (28), /* SQEBR B+28 */
273 COSTS_N_INSNS (1), /* MADBR B */
274 COSTS_N_INSNS (1), /* MAEBR B */
275 COSTS_N_INSNS (101), /* DXBR B+101 */
276 COSTS_N_INSNS (29), /* DDBR */
277 COSTS_N_INSNS (22), /* DEBR */
278 COSTS_N_INSNS (160), /* DLGR cracked */
279 COSTS_N_INSNS (160), /* DLR cracked */
280 COSTS_N_INSNS (160), /* DR expanded */
281 COSTS_N_INSNS (160), /* DSGFR cracked */
282 COSTS_N_INSNS (160), /* DSGR cracked */
286 struct processor_costs zEC12_cost =
288 COSTS_N_INSNS (7), /* M */
289 COSTS_N_INSNS (5), /* MGHI */
290 COSTS_N_INSNS (5), /* MH */
291 COSTS_N_INSNS (5), /* MHI */
292 COSTS_N_INSNS (7), /* ML */
293 COSTS_N_INSNS (7), /* MR */
294 COSTS_N_INSNS (6), /* MS */
295 COSTS_N_INSNS (8), /* MSG */
296 COSTS_N_INSNS (6), /* MSGF */
297 COSTS_N_INSNS (6), /* MSGFR */
298 COSTS_N_INSNS (8), /* MSGR */
299 COSTS_N_INSNS (6), /* MSR */
300 COSTS_N_INSNS (1) , /* multiplication in DFmode */
301 COSTS_N_INSNS (40), /* MXBR B+40 */
302 COSTS_N_INSNS (100), /* SQXBR B+100 */
303 COSTS_N_INSNS (42), /* SQDBR B+42 */
304 COSTS_N_INSNS (28), /* SQEBR B+28 */
305 COSTS_N_INSNS (1), /* MADBR B */
306 COSTS_N_INSNS (1), /* MAEBR B */
307 COSTS_N_INSNS (131), /* DXBR B+131 */
308 COSTS_N_INSNS (29), /* DDBR */
309 COSTS_N_INSNS (22), /* DEBR */
310 COSTS_N_INSNS (160), /* DLGR cracked */
311 COSTS_N_INSNS (160), /* DLR cracked */
312 COSTS_N_INSNS (160), /* DR expanded */
313 COSTS_N_INSNS (160), /* DSGFR cracked */
314 COSTS_N_INSNS (160), /* DSGR cracked */
317 extern int reload_completed;
319 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
320 static rtx_insn *last_scheduled_insn;
322 /* Structure used to hold the components of a S/390 memory
323 address. A legitimate address on S/390 is of the general
325 base + index + displacement
326 where any of the components is optional.
328 base and index are registers of the class ADDR_REGS,
329 displacement is an unsigned 12-bit immediate constant. */
340 /* The following structure is embedded in the machine
341 specific part of struct function. */
343 struct GTY (()) s390_frame_layout
345 /* Offset within stack frame. */
346 HOST_WIDE_INT gprs_offset;
347 HOST_WIDE_INT f0_offset;
348 HOST_WIDE_INT f4_offset;
349 HOST_WIDE_INT f8_offset;
350 HOST_WIDE_INT backchain_offset;
352 /* Number of first and last gpr where slots in the register
353 save area are reserved for. */
354 int first_save_gpr_slot;
355 int last_save_gpr_slot;
357 /* Location (FP register number) where GPRs (r0-r15) should
359 0 - does not need to be saved at all
361 signed char gpr_save_slots[16];
363 /* Number of first and last gpr to be saved, restored. */
365 int first_restore_gpr;
367 int last_restore_gpr;
369 /* Bits standing for floating point registers. Set, if the
370 respective register has to be saved. Starting with reg 16 (f0)
371 at the rightmost bit.
372 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
373 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
374 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
375 unsigned int fpr_bitmap;
377 /* Number of floating point registers f8-f15 which must be saved. */
380 /* Set if return address needs to be saved.
381 This flag is set by s390_return_addr_rtx if it could not use
382 the initial value of r14 and therefore depends on r14 saved
384 bool save_return_addr_p;
386 /* Size of stack frame. */
387 HOST_WIDE_INT frame_size;
390 /* Define the structure for the machine field in struct function. */
392 struct GTY(()) machine_function
394 struct s390_frame_layout frame_layout;
396 /* Literal pool base register. */
399 /* True if we may need to perform branch splitting. */
400 bool split_branches_pending_p;
402 bool has_landing_pad_p;
404 /* True if the current function may contain a tbegin clobbering
409 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
411 #define cfun_frame_layout (cfun->machine->frame_layout)
412 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
413 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
414 ? cfun_frame_layout.fpr_bitmap & 0x0f \
415 : cfun_frame_layout.fpr_bitmap & 0x03))
416 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
417 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
418 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
419 (1 << (REGNO - FPR0_REGNUM)))
420 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
421 (1 << (REGNO - FPR0_REGNUM))))
422 #define cfun_gpr_save_slot(REGNO) \
423 cfun->machine->frame_layout.gpr_save_slots[REGNO]
425 /* Number of GPRs and FPRs used for argument passing. */
426 #define GP_ARG_NUM_REG 5
427 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
429 /* A couple of shortcuts. */
430 #define CONST_OK_FOR_J(x) \
431 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
432 #define CONST_OK_FOR_K(x) \
433 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
434 #define CONST_OK_FOR_Os(x) \
435 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
436 #define CONST_OK_FOR_Op(x) \
437 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
438 #define CONST_OK_FOR_On(x) \
439 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
441 #define REGNO_PAIR_OK(REGNO, MODE) \
442 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
444 /* That's the read ahead of the dynamic branch prediction unit in
445 bytes on a z10 (or higher) CPU. */
446 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
448 static const int s390_hotpatch_trampoline_halfwords_default = 12;
449 static const int s390_hotpatch_trampoline_halfwords_max = 1000000;
450 static int s390_hotpatch_trampoline_halfwords = -1;
452 /* Return the argument of the given hotpatch attribute or the default value if
453 no argument is present. */
456 get_hotpatch_attribute (tree hotpatch_attr)
460 args = TREE_VALUE (hotpatch_attr);
463 TREE_INT_CST_LOW (TREE_VALUE (args)):
464 s390_hotpatch_trampoline_halfwords_default;
467 /* Check whether the hotpatch attribute is applied to a function and, if it has
468 an argument, the argument is valid. */
471 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
472 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
474 if (TREE_CODE (*node) != FUNCTION_DECL)
476 warning (OPT_Wattributes, "%qE attribute only applies to functions",
478 *no_add_attrs = true;
482 tree expr = TREE_VALUE (args);
484 if (TREE_CODE (expr) != INTEGER_CST
485 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
486 || wi::gtu_p (expr, s390_hotpatch_trampoline_halfwords_max))
488 error ("requested %qE attribute is not a non-negative integer"
489 " constant or too large (max. %d)", name,
490 s390_hotpatch_trampoline_halfwords_max);
491 *no_add_attrs = true;
498 static const struct attribute_spec s390_attribute_table[] = {
499 { "hotpatch", 0, 1, true, false, false, s390_handle_hotpatch_attribute, false
502 { NULL, 0, 0, false, false, false, NULL, false }
505 /* Return the alignment for LABEL. We default to the -falign-labels
506 value except for the literal pool base label. */
508 s390_label_align (rtx label)
510 rtx_insn *prev_insn = prev_active_insn (label);
513 if (prev_insn == NULL_RTX)
516 set = single_set (prev_insn);
523 /* Don't align literal pool base labels. */
524 if (GET_CODE (src) == UNSPEC
525 && XINT (src, 1) == UNSPEC_MAIN_BASE)
529 return align_labels_log;
532 static enum machine_mode
533 s390_libgcc_cmp_return_mode (void)
535 return TARGET_64BIT ? DImode : SImode;
538 static enum machine_mode
539 s390_libgcc_shift_count_mode (void)
541 return TARGET_64BIT ? DImode : SImode;
544 static enum machine_mode
545 s390_unwind_word_mode (void)
547 return TARGET_64BIT ? DImode : SImode;
550 /* Return true if the back end supports mode MODE. */
552 s390_scalar_mode_supported_p (enum machine_mode mode)
554 /* In contrast to the default implementation reject TImode constants on 31bit
555 TARGET_ZARCH for ABI compliance. */
556 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
559 if (DECIMAL_FLOAT_MODE_P (mode))
560 return default_decimal_float_supported_p ();
562 return default_scalar_mode_supported_p (mode);
565 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
568 s390_set_has_landing_pad_p (bool value)
570 cfun->machine->has_landing_pad_p = value;
573 /* If two condition code modes are compatible, return a condition code
574 mode which is compatible with both. Otherwise, return
577 static enum machine_mode
578 s390_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
586 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
587 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
608 /* Return true if SET either doesn't set the CC register, or else
609 the source and destination have matching CC modes and that
610 CC mode is at least as constrained as REQ_MODE. */
613 s390_match_ccmode_set (rtx set, enum machine_mode req_mode)
615 enum machine_mode set_mode;
617 gcc_assert (GET_CODE (set) == SET);
619 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
622 set_mode = GET_MODE (SET_DEST (set));
636 if (req_mode != set_mode)
641 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
642 && req_mode != CCSRmode && req_mode != CCURmode)
648 if (req_mode != CCAmode)
656 return (GET_MODE (SET_SRC (set)) == set_mode);
659 /* Return true if every SET in INSN that sets the CC register
660 has source and destination with matching CC modes and that
661 CC mode is at least as constrained as REQ_MODE.
662 If REQ_MODE is VOIDmode, always return false. */
665 s390_match_ccmode (rtx_insn *insn, enum machine_mode req_mode)
669 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
670 if (req_mode == VOIDmode)
673 if (GET_CODE (PATTERN (insn)) == SET)
674 return s390_match_ccmode_set (PATTERN (insn), req_mode);
676 if (GET_CODE (PATTERN (insn)) == PARALLEL)
677 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
679 rtx set = XVECEXP (PATTERN (insn), 0, i);
680 if (GET_CODE (set) == SET)
681 if (!s390_match_ccmode_set (set, req_mode))
688 /* If a test-under-mask instruction can be used to implement
689 (compare (and ... OP1) OP2), return the CC mode required
690 to do that. Otherwise, return VOIDmode.
691 MIXED is true if the instruction can distinguish between
692 CC1 and CC2 for mixed selected bits (TMxx), it is false
693 if the instruction cannot (TM). */
696 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
700 /* ??? Fixme: should work on CONST_DOUBLE as well. */
701 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
704 /* Selected bits all zero: CC0.
705 e.g.: int a; if ((a & (16 + 128)) == 0) */
706 if (INTVAL (op2) == 0)
709 /* Selected bits all one: CC3.
710 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
711 if (INTVAL (op2) == INTVAL (op1))
714 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
716 if ((a & (16 + 128)) == 16) -> CCT1
717 if ((a & (16 + 128)) == 128) -> CCT2 */
720 bit1 = exact_log2 (INTVAL (op2));
721 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
722 if (bit0 != -1 && bit1 != -1)
723 return bit0 > bit1 ? CCT1mode : CCT2mode;
729 /* Given a comparison code OP (EQ, NE, etc.) and the operands
730 OP0 and OP1 of a COMPARE, return the mode to be used for the
734 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
740 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
741 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
743 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
744 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
746 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
747 || GET_CODE (op1) == NEG)
748 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
751 if (GET_CODE (op0) == AND)
753 /* Check whether we can potentially do it via TM. */
754 enum machine_mode ccmode;
755 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
756 if (ccmode != VOIDmode)
758 /* Relax CCTmode to CCZmode to allow fall-back to AND
759 if that turns out to be beneficial. */
760 return ccmode == CCTmode ? CCZmode : ccmode;
764 if (register_operand (op0, HImode)
765 && GET_CODE (op1) == CONST_INT
766 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
768 if (register_operand (op0, QImode)
769 && GET_CODE (op1) == CONST_INT
770 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
779 /* The only overflow condition of NEG and ABS happens when
780 -INT_MAX is used as parameter, which stays negative. So
781 we have an overflow from a positive value to a negative.
782 Using CCAP mode the resulting cc can be used for comparisons. */
783 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
784 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
787 /* If constants are involved in an add instruction it is possible to use
788 the resulting cc for comparisons with zero. Knowing the sign of the
789 constant the overflow behavior gets predictable. e.g.:
790 int a, b; if ((b = a + c) > 0)
791 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
792 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
793 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
794 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
795 /* Avoid INT32_MIN on 32 bit. */
796 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
798 if (INTVAL (XEXP((op0), 1)) < 0)
812 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
813 && GET_CODE (op1) != CONST_INT)
819 if (GET_CODE (op0) == PLUS
820 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
823 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
824 && GET_CODE (op1) != CONST_INT)
830 if (GET_CODE (op0) == MINUS
831 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
834 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
835 && GET_CODE (op1) != CONST_INT)
844 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
845 that we can implement more efficiently. */
848 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
849 bool op0_preserve_value)
851 if (op0_preserve_value)
854 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
855 if ((*code == EQ || *code == NE)
856 && *op1 == const0_rtx
857 && GET_CODE (*op0) == ZERO_EXTRACT
858 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
859 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
860 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
862 rtx inner = XEXP (*op0, 0);
863 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
864 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
865 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
867 if (len > 0 && len < modesize
868 && pos >= 0 && pos + len <= modesize
869 && modesize <= HOST_BITS_PER_WIDE_INT)
871 unsigned HOST_WIDE_INT block;
872 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
873 block <<= modesize - pos - len;
875 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
876 gen_int_mode (block, GET_MODE (inner)));
880 /* Narrow AND of memory against immediate to enable TM. */
881 if ((*code == EQ || *code == NE)
882 && *op1 == const0_rtx
883 && GET_CODE (*op0) == AND
884 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
885 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
887 rtx inner = XEXP (*op0, 0);
888 rtx mask = XEXP (*op0, 1);
890 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
891 if (GET_CODE (inner) == SUBREG
892 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
893 && (GET_MODE_SIZE (GET_MODE (inner))
894 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
896 & GET_MODE_MASK (GET_MODE (inner))
897 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
899 inner = SUBREG_REG (inner);
901 /* Do not change volatile MEMs. */
902 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
904 int part = s390_single_part (XEXP (*op0, 1),
905 GET_MODE (inner), QImode, 0);
908 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
909 inner = adjust_address_nv (inner, QImode, part);
910 *op0 = gen_rtx_AND (QImode, inner, mask);
915 /* Narrow comparisons against 0xffff to HImode if possible. */
916 if ((*code == EQ || *code == NE)
917 && GET_CODE (*op1) == CONST_INT
918 && INTVAL (*op1) == 0xffff
919 && SCALAR_INT_MODE_P (GET_MODE (*op0))
920 && (nonzero_bits (*op0, GET_MODE (*op0))
921 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
923 *op0 = gen_lowpart (HImode, *op0);
927 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
928 if (GET_CODE (*op0) == UNSPEC
929 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
930 && XVECLEN (*op0, 0) == 1
931 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
932 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
933 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
934 && *op1 == const0_rtx)
936 enum rtx_code new_code = UNKNOWN;
939 case EQ: new_code = EQ; break;
940 case NE: new_code = NE; break;
941 case LT: new_code = GTU; break;
942 case GT: new_code = LTU; break;
943 case LE: new_code = GEU; break;
944 case GE: new_code = LEU; break;
948 if (new_code != UNKNOWN)
950 *op0 = XVECEXP (*op0, 0, 0);
955 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
956 if (GET_CODE (*op0) == UNSPEC
957 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
958 && XVECLEN (*op0, 0) == 1
959 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
960 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
961 && CONST_INT_P (*op1))
963 enum rtx_code new_code = UNKNOWN;
964 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
970 case EQ: new_code = EQ; break;
971 case NE: new_code = NE; break;
978 if (new_code != UNKNOWN)
980 /* For CCRAWmode put the required cc mask into the second
982 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
983 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
984 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
985 *op0 = XVECEXP (*op0, 0, 0);
990 /* Simplify cascaded EQ, NE with const0_rtx. */
991 if ((*code == NE || *code == EQ)
992 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
993 && GET_MODE (*op0) == SImode
994 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
995 && REG_P (XEXP (*op0, 0))
996 && XEXP (*op0, 1) == const0_rtx
997 && *op1 == const0_rtx)
999 if ((*code == EQ && GET_CODE (*op0) == NE)
1000 || (*code == NE && GET_CODE (*op0) == EQ))
1004 *op0 = XEXP (*op0, 0);
1007 /* Prefer register over memory as first operand. */
1008 if (MEM_P (*op0) && REG_P (*op1))
1010 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1011 *code = (int)swap_condition ((enum rtx_code)*code);
1015 /* Emit a compare instruction suitable to implement the comparison
1016 OP0 CODE OP1. Return the correct condition RTL to be placed in
1017 the IF_THEN_ELSE of the conditional branch testing the result. */
1020 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1022 enum machine_mode mode = s390_select_ccmode (code, op0, op1);
1025 /* Do not output a redundant compare instruction if a compare_and_swap
1026 pattern already computed the result and the machine modes are compatible. */
1027 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1029 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1035 cc = gen_rtx_REG (mode, CC_REGNUM);
1036 emit_insn (gen_rtx_SET (VOIDmode, cc, gen_rtx_COMPARE (mode, op0, op1)));
1039 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1042 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1044 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1045 conditional branch testing the result. */
1048 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1049 rtx cmp, rtx new_rtx)
1051 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1052 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1056 /* Emit a jump instruction to TARGET and return it. If COND is
1057 NULL_RTX, emit an unconditional jump, else a conditional jump under
1061 s390_emit_jump (rtx target, rtx cond)
1065 target = gen_rtx_LABEL_REF (VOIDmode, target);
1067 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1069 insn = gen_rtx_SET (VOIDmode, pc_rtx, target);
1070 return emit_jump_insn (insn);
1073 /* Return branch condition mask to implement a branch
1074 specified by CODE. Return -1 for invalid comparisons. */
1077 s390_branch_condition_mask (rtx code)
1079 const int CC0 = 1 << 3;
1080 const int CC1 = 1 << 2;
1081 const int CC2 = 1 << 1;
1082 const int CC3 = 1 << 0;
1084 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1085 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1086 gcc_assert (XEXP (code, 1) == const0_rtx
1087 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1088 && CONST_INT_P (XEXP (code, 1))));
1091 switch (GET_MODE (XEXP (code, 0)))
1095 switch (GET_CODE (code))
1097 case EQ: return CC0;
1098 case NE: return CC1 | CC2 | CC3;
1104 switch (GET_CODE (code))
1106 case EQ: return CC1;
1107 case NE: return CC0 | CC2 | CC3;
1113 switch (GET_CODE (code))
1115 case EQ: return CC2;
1116 case NE: return CC0 | CC1 | CC3;
1122 switch (GET_CODE (code))
1124 case EQ: return CC3;
1125 case NE: return CC0 | CC1 | CC2;
1131 switch (GET_CODE (code))
1133 case EQ: return CC0 | CC2;
1134 case NE: return CC1 | CC3;
1140 switch (GET_CODE (code))
1142 case LTU: return CC2 | CC3; /* carry */
1143 case GEU: return CC0 | CC1; /* no carry */
1149 switch (GET_CODE (code))
1151 case GTU: return CC0 | CC1; /* borrow */
1152 case LEU: return CC2 | CC3; /* no borrow */
1158 switch (GET_CODE (code))
1160 case EQ: return CC0 | CC2;
1161 case NE: return CC1 | CC3;
1162 case LTU: return CC1;
1163 case GTU: return CC3;
1164 case LEU: return CC1 | CC2;
1165 case GEU: return CC2 | CC3;
1170 switch (GET_CODE (code))
1172 case EQ: return CC0;
1173 case NE: return CC1 | CC2 | CC3;
1174 case LTU: return CC1;
1175 case GTU: return CC2;
1176 case LEU: return CC0 | CC1;
1177 case GEU: return CC0 | CC2;
1183 switch (GET_CODE (code))
1185 case EQ: return CC0;
1186 case NE: return CC2 | CC1 | CC3;
1187 case LTU: return CC2;
1188 case GTU: return CC1;
1189 case LEU: return CC0 | CC2;
1190 case GEU: return CC0 | CC1;
1196 switch (GET_CODE (code))
1198 case EQ: return CC0;
1199 case NE: return CC1 | CC2 | CC3;
1200 case LT: return CC1 | CC3;
1201 case GT: return CC2;
1202 case LE: return CC0 | CC1 | CC3;
1203 case GE: return CC0 | CC2;
1209 switch (GET_CODE (code))
1211 case EQ: return CC0;
1212 case NE: return CC1 | CC2 | CC3;
1213 case LT: return CC1;
1214 case GT: return CC2 | CC3;
1215 case LE: return CC0 | CC1;
1216 case GE: return CC0 | CC2 | CC3;
1222 switch (GET_CODE (code))
1224 case EQ: return CC0;
1225 case NE: return CC1 | CC2 | CC3;
1226 case LT: return CC1;
1227 case GT: return CC2;
1228 case LE: return CC0 | CC1;
1229 case GE: return CC0 | CC2;
1230 case UNORDERED: return CC3;
1231 case ORDERED: return CC0 | CC1 | CC2;
1232 case UNEQ: return CC0 | CC3;
1233 case UNLT: return CC1 | CC3;
1234 case UNGT: return CC2 | CC3;
1235 case UNLE: return CC0 | CC1 | CC3;
1236 case UNGE: return CC0 | CC2 | CC3;
1237 case LTGT: return CC1 | CC2;
1243 switch (GET_CODE (code))
1245 case EQ: return CC0;
1246 case NE: return CC2 | CC1 | CC3;
1247 case LT: return CC2;
1248 case GT: return CC1;
1249 case LE: return CC0 | CC2;
1250 case GE: return CC0 | CC1;
1251 case UNORDERED: return CC3;
1252 case ORDERED: return CC0 | CC2 | CC1;
1253 case UNEQ: return CC0 | CC3;
1254 case UNLT: return CC2 | CC3;
1255 case UNGT: return CC1 | CC3;
1256 case UNLE: return CC0 | CC2 | CC3;
1257 case UNGE: return CC0 | CC1 | CC3;
1258 case LTGT: return CC2 | CC1;
1264 switch (GET_CODE (code))
1267 return INTVAL (XEXP (code, 1));
1269 return (INTVAL (XEXP (code, 1))) ^ 0xf;
1280 /* Return branch condition mask to implement a compare and branch
1281 specified by CODE. Return -1 for invalid comparisons. */
1284 s390_compare_and_branch_condition_mask (rtx code)
1286 const int CC0 = 1 << 3;
1287 const int CC1 = 1 << 2;
1288 const int CC2 = 1 << 1;
1290 switch (GET_CODE (code))
1314 /* If INV is false, return assembler mnemonic string to implement
1315 a branch specified by CODE. If INV is true, return mnemonic
1316 for the corresponding inverted branch. */
1319 s390_branch_condition_mnemonic (rtx code, int inv)
1323 static const char *const mnemonic[16] =
1325 NULL, "o", "h", "nle",
1326 "l", "nhe", "lh", "ne",
1327 "e", "nlh", "he", "nl",
1328 "le", "nh", "no", NULL
1331 if (GET_CODE (XEXP (code, 0)) == REG
1332 && REGNO (XEXP (code, 0)) == CC_REGNUM
1333 && (XEXP (code, 1) == const0_rtx
1334 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1335 && CONST_INT_P (XEXP (code, 1)))))
1336 mask = s390_branch_condition_mask (code);
1338 mask = s390_compare_and_branch_condition_mask (code);
1340 gcc_assert (mask >= 0);
1345 gcc_assert (mask >= 1 && mask <= 14);
1347 return mnemonic[mask];
1350 /* Return the part of op which has a value different from def.
1351 The size of the part is determined by mode.
1352 Use this function only if you already know that op really
1353 contains such a part. */
1355 unsigned HOST_WIDE_INT
1356 s390_extract_part (rtx op, enum machine_mode mode, int def)
1358 unsigned HOST_WIDE_INT value = 0;
1359 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
1360 int part_bits = GET_MODE_BITSIZE (mode);
1361 unsigned HOST_WIDE_INT part_mask
1362 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
1365 for (i = 0; i < max_parts; i++)
1368 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1370 value >>= part_bits;
1372 if ((value & part_mask) != (def & part_mask))
1373 return value & part_mask;
1379 /* If OP is an integer constant of mode MODE with exactly one
1380 part of mode PART_MODE unequal to DEF, return the number of that
1381 part. Otherwise, return -1. */
1384 s390_single_part (rtx op,
1385 enum machine_mode mode,
1386 enum machine_mode part_mode,
1389 unsigned HOST_WIDE_INT value = 0;
1390 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
1391 unsigned HOST_WIDE_INT part_mask
1392 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
1395 if (GET_CODE (op) != CONST_INT)
1398 for (i = 0; i < n_parts; i++)
1401 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1403 value >>= GET_MODE_BITSIZE (part_mode);
1405 if ((value & part_mask) != (def & part_mask))
1413 return part == -1 ? -1 : n_parts - 1 - part;
1416 /* Return true if IN contains a contiguous bitfield in the lower SIZE
1417 bits and no other bits are set in IN. POS and LENGTH can be used
1418 to obtain the start position and the length of the bitfield.
1420 POS gives the position of the first bit of the bitfield counting
1421 from the lowest order bit starting with zero. In order to use this
1422 value for S/390 instructions this has to be converted to "bits big
1426 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
1427 int *pos, int *length)
1432 unsigned HOST_WIDE_INT mask = 1ULL;
1433 bool contiguous = false;
1435 for (i = 0; i < size; mask <<= 1, i++)
1459 /* Calculate a mask for all bits beyond the contiguous bits. */
1460 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
1465 if (tmp_length + tmp_pos - 1 > size)
1469 *length = tmp_length;
1477 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
1478 equivalent to a shift followed by the AND. In particular, CONTIG
1479 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
1480 for ROTL indicate a rotate to the right. */
1483 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
1488 ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
1491 return ((rotl >= 0 && rotl <= pos)
1492 || (rotl < 0 && -rotl <= bitsize - len - pos));
1495 /* Check whether we can (and want to) split a double-word
1496 move in mode MODE from SRC to DST into two single-word
1497 moves, moving the subword FIRST_SUBWORD first. */
1500 s390_split_ok_p (rtx dst, rtx src, enum machine_mode mode, int first_subword)
1502 /* Floating point registers cannot be split. */
1503 if (FP_REG_P (src) || FP_REG_P (dst))
1506 /* We don't need to split if operands are directly accessible. */
1507 if (s_operand (src, mode) || s_operand (dst, mode))
1510 /* Non-offsettable memory references cannot be split. */
1511 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
1512 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
1515 /* Moving the first subword must not clobber a register
1516 needed to move the second subword. */
1517 if (register_operand (dst, mode))
1519 rtx subreg = operand_subword (dst, first_subword, 0, mode);
1520 if (reg_overlap_mentioned_p (subreg, src))
1527 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
1528 and [MEM2, MEM2 + SIZE] do overlap and false
1532 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
1534 rtx addr1, addr2, addr_delta;
1535 HOST_WIDE_INT delta;
1537 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1543 addr1 = XEXP (mem1, 0);
1544 addr2 = XEXP (mem2, 0);
1546 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1548 /* This overlapping check is used by peepholes merging memory block operations.
1549 Overlapping operations would otherwise be recognized by the S/390 hardware
1550 and would fall back to a slower implementation. Allowing overlapping
1551 operations would lead to slow code but not to wrong code. Therefore we are
1552 somewhat optimistic if we cannot prove that the memory blocks are
1554 That's why we return false here although this may accept operations on
1555 overlapping memory areas. */
1556 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
1559 delta = INTVAL (addr_delta);
1562 || (delta > 0 && delta < size)
1563 || (delta < 0 && -delta < size))
1569 /* Check whether the address of memory reference MEM2 equals exactly
1570 the address of memory reference MEM1 plus DELTA. Return true if
1571 we can prove this to be the case, false otherwise. */
1574 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
1576 rtx addr1, addr2, addr_delta;
1578 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1581 addr1 = XEXP (mem1, 0);
1582 addr2 = XEXP (mem2, 0);
1584 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1585 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
1591 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
1594 s390_expand_logical_operator (enum rtx_code code, enum machine_mode mode,
1597 enum machine_mode wmode = mode;
1598 rtx dst = operands[0];
1599 rtx src1 = operands[1];
1600 rtx src2 = operands[2];
1603 /* If we cannot handle the operation directly, use a temp register. */
1604 if (!s390_logical_operator_ok_p (operands))
1605 dst = gen_reg_rtx (mode);
1607 /* QImode and HImode patterns make sense only if we have a destination
1608 in memory. Otherwise perform the operation in SImode. */
1609 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
1612 /* Widen operands if required. */
1615 if (GET_CODE (dst) == SUBREG
1616 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
1618 else if (REG_P (dst))
1619 dst = gen_rtx_SUBREG (wmode, dst, 0);
1621 dst = gen_reg_rtx (wmode);
1623 if (GET_CODE (src1) == SUBREG
1624 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
1626 else if (GET_MODE (src1) != VOIDmode)
1627 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
1629 if (GET_CODE (src2) == SUBREG
1630 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
1632 else if (GET_MODE (src2) != VOIDmode)
1633 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
1636 /* Emit the instruction. */
1637 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
1638 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
1639 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
1641 /* Fix up the destination if needed. */
1642 if (dst != operands[0])
1643 emit_move_insn (operands[0], gen_lowpart (mode, dst));
1646 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
1649 s390_logical_operator_ok_p (rtx *operands)
1651 /* If the destination operand is in memory, it needs to coincide
1652 with one of the source operands. After reload, it has to be
1653 the first source operand. */
1654 if (GET_CODE (operands[0]) == MEM)
1655 return rtx_equal_p (operands[0], operands[1])
1656 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
1661 /* Narrow logical operation CODE of memory operand MEMOP with immediate
1662 operand IMMOP to switch from SS to SI type instructions. */
1665 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
1667 int def = code == AND ? -1 : 0;
1671 gcc_assert (GET_CODE (*memop) == MEM);
1672 gcc_assert (!MEM_VOLATILE_P (*memop));
1674 mask = s390_extract_part (*immop, QImode, def);
1675 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
1676 gcc_assert (part >= 0);
1678 *memop = adjust_address (*memop, QImode, part);
1679 *immop = gen_int_mode (mask, QImode);
1683 /* How to allocate a 'struct machine_function'. */
1685 static struct machine_function *
1686 s390_init_machine_status (void)
1688 return ggc_cleared_alloc<machine_function> ();
1691 /* Map for smallest class containing reg regno. */
1693 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
1694 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1695 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1696 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1697 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1698 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1699 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1700 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1701 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1702 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS,
1703 ACCESS_REGS, ACCESS_REGS
1706 /* Return attribute type of insn. */
1708 static enum attr_type
1709 s390_safe_attr_type (rtx_insn *insn)
1711 if (recog_memoized (insn) >= 0)
1712 return get_attr_type (insn);
1717 /* Return true if DISP is a valid short displacement. */
1720 s390_short_displacement (rtx disp)
1722 /* No displacement is OK. */
1726 /* Without the long displacement facility we don't need to
1727 distingiush between long and short displacement. */
1728 if (!TARGET_LONG_DISPLACEMENT)
1731 /* Integer displacement in range. */
1732 if (GET_CODE (disp) == CONST_INT)
1733 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
1735 /* GOT offset is not OK, the GOT can be large. */
1736 if (GET_CODE (disp) == CONST
1737 && GET_CODE (XEXP (disp, 0)) == UNSPEC
1738 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
1739 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
1742 /* All other symbolic constants are literal pool references,
1743 which are OK as the literal pool must be small. */
1744 if (GET_CODE (disp) == CONST)
1750 /* Decompose a RTL expression ADDR for a memory address into
1751 its components, returned in OUT.
1753 Returns false if ADDR is not a valid memory address, true
1754 otherwise. If OUT is NULL, don't return the components,
1755 but check for validity only.
1757 Note: Only addresses in canonical form are recognized.
1758 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
1759 canonical form so that they will be recognized. */
1762 s390_decompose_address (rtx addr, struct s390_address *out)
1764 HOST_WIDE_INT offset = 0;
1765 rtx base = NULL_RTX;
1766 rtx indx = NULL_RTX;
1767 rtx disp = NULL_RTX;
1769 bool pointer = false;
1770 bool base_ptr = false;
1771 bool indx_ptr = false;
1772 bool literal_pool = false;
1774 /* We may need to substitute the literal pool base register into the address
1775 below. However, at this point we do not know which register is going to
1776 be used as base, so we substitute the arg pointer register. This is going
1777 to be treated as holding a pointer below -- it shouldn't be used for any
1779 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
1781 /* Decompose address into base + index + displacement. */
1783 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
1786 else if (GET_CODE (addr) == PLUS)
1788 rtx op0 = XEXP (addr, 0);
1789 rtx op1 = XEXP (addr, 1);
1790 enum rtx_code code0 = GET_CODE (op0);
1791 enum rtx_code code1 = GET_CODE (op1);
1793 if (code0 == REG || code0 == UNSPEC)
1795 if (code1 == REG || code1 == UNSPEC)
1797 indx = op0; /* index + base */
1803 base = op0; /* base + displacement */
1808 else if (code0 == PLUS)
1810 indx = XEXP (op0, 0); /* index + base + disp */
1811 base = XEXP (op0, 1);
1822 disp = addr; /* displacement */
1824 /* Extract integer part of displacement. */
1828 if (GET_CODE (disp) == CONST_INT)
1830 offset = INTVAL (disp);
1833 else if (GET_CODE (disp) == CONST
1834 && GET_CODE (XEXP (disp, 0)) == PLUS
1835 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
1837 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
1838 disp = XEXP (XEXP (disp, 0), 0);
1842 /* Strip off CONST here to avoid special case tests later. */
1843 if (disp && GET_CODE (disp) == CONST)
1844 disp = XEXP (disp, 0);
1846 /* We can convert literal pool addresses to
1847 displacements by basing them off the base register. */
1848 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
1850 /* Either base or index must be free to hold the base register. */
1852 base = fake_pool_base, literal_pool = true;
1854 indx = fake_pool_base, literal_pool = true;
1858 /* Mark up the displacement. */
1859 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
1860 UNSPEC_LTREL_OFFSET);
1863 /* Validate base register. */
1866 if (GET_CODE (base) == UNSPEC)
1867 switch (XINT (base, 1))
1871 disp = gen_rtx_UNSPEC (Pmode,
1872 gen_rtvec (1, XVECEXP (base, 0, 0)),
1873 UNSPEC_LTREL_OFFSET);
1877 base = XVECEXP (base, 0, 1);
1880 case UNSPEC_LTREL_BASE:
1881 if (XVECLEN (base, 0) == 1)
1882 base = fake_pool_base, literal_pool = true;
1884 base = XVECEXP (base, 0, 1);
1892 || (GET_MODE (base) != SImode
1893 && GET_MODE (base) != Pmode))
1896 if (REGNO (base) == STACK_POINTER_REGNUM
1897 || REGNO (base) == FRAME_POINTER_REGNUM
1898 || ((reload_completed || reload_in_progress)
1899 && frame_pointer_needed
1900 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
1901 || REGNO (base) == ARG_POINTER_REGNUM
1903 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
1904 pointer = base_ptr = true;
1906 if ((reload_completed || reload_in_progress)
1907 && base == cfun->machine->base_reg)
1908 pointer = base_ptr = literal_pool = true;
1911 /* Validate index register. */
1914 if (GET_CODE (indx) == UNSPEC)
1915 switch (XINT (indx, 1))
1919 disp = gen_rtx_UNSPEC (Pmode,
1920 gen_rtvec (1, XVECEXP (indx, 0, 0)),
1921 UNSPEC_LTREL_OFFSET);
1925 indx = XVECEXP (indx, 0, 1);
1928 case UNSPEC_LTREL_BASE:
1929 if (XVECLEN (indx, 0) == 1)
1930 indx = fake_pool_base, literal_pool = true;
1932 indx = XVECEXP (indx, 0, 1);
1940 || (GET_MODE (indx) != SImode
1941 && GET_MODE (indx) != Pmode))
1944 if (REGNO (indx) == STACK_POINTER_REGNUM
1945 || REGNO (indx) == FRAME_POINTER_REGNUM
1946 || ((reload_completed || reload_in_progress)
1947 && frame_pointer_needed
1948 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
1949 || REGNO (indx) == ARG_POINTER_REGNUM
1951 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
1952 pointer = indx_ptr = true;
1954 if ((reload_completed || reload_in_progress)
1955 && indx == cfun->machine->base_reg)
1956 pointer = indx_ptr = literal_pool = true;
1959 /* Prefer to use pointer as base, not index. */
1960 if (base && indx && !base_ptr
1961 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
1968 /* Validate displacement. */
1971 /* If virtual registers are involved, the displacement will change later
1972 anyway as the virtual registers get eliminated. This could make a
1973 valid displacement invalid, but it is more likely to make an invalid
1974 displacement valid, because we sometimes access the register save area
1975 via negative offsets to one of those registers.
1976 Thus we don't check the displacement for validity here. If after
1977 elimination the displacement turns out to be invalid after all,
1978 this is fixed up by reload in any case. */
1979 /* LRA maintains always displacements up to date and we need to
1980 know the displacement is right during all LRA not only at the
1981 final elimination. */
1983 || (base != arg_pointer_rtx
1984 && indx != arg_pointer_rtx
1985 && base != return_address_pointer_rtx
1986 && indx != return_address_pointer_rtx
1987 && base != frame_pointer_rtx
1988 && indx != frame_pointer_rtx
1989 && base != virtual_stack_vars_rtx
1990 && indx != virtual_stack_vars_rtx))
1991 if (!DISP_IN_RANGE (offset))
1996 /* All the special cases are pointers. */
1999 /* In the small-PIC case, the linker converts @GOT
2000 and @GOTNTPOFF offsets to possible displacements. */
2001 if (GET_CODE (disp) == UNSPEC
2002 && (XINT (disp, 1) == UNSPEC_GOT
2003 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2009 /* Accept pool label offsets. */
2010 else if (GET_CODE (disp) == UNSPEC
2011 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2014 /* Accept literal pool references. */
2015 else if (GET_CODE (disp) == UNSPEC
2016 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2018 /* In case CSE pulled a non literal pool reference out of
2019 the pool we have to reject the address. This is
2020 especially important when loading the GOT pointer on non
2021 zarch CPUs. In this case the literal pool contains an lt
2022 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2023 will most likely exceed the displacement. */
2024 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2025 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2028 orig_disp = gen_rtx_CONST (Pmode, disp);
2031 /* If we have an offset, make sure it does not
2032 exceed the size of the constant pool entry. */
2033 rtx sym = XVECEXP (disp, 0, 0);
2034 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2037 orig_disp = plus_constant (Pmode, orig_disp, offset);
2052 out->disp = orig_disp;
2053 out->pointer = pointer;
2054 out->literal_pool = literal_pool;
2060 /* Decompose a RTL expression OP for a shift count into its components,
2061 and return the base register in BASE and the offset in OFFSET.
2063 Return true if OP is a valid shift count, false if not. */
2066 s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2068 HOST_WIDE_INT off = 0;
2070 /* We can have an integer constant, an address register,
2071 or a sum of the two. */
2072 if (GET_CODE (op) == CONST_INT)
2077 if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
2079 off = INTVAL (XEXP (op, 1));
2082 while (op && GET_CODE (op) == SUBREG)
2083 op = SUBREG_REG (op);
2085 if (op && GET_CODE (op) != REG)
2097 /* Return true if CODE is a valid address without index. */
2100 s390_legitimate_address_without_index_p (rtx op)
2102 struct s390_address addr;
2104 if (!s390_decompose_address (XEXP (op, 0), &addr))
2113 /* Return TRUE if ADDR is an operand valid for a load/store relative
2114 instruction. Be aware that the alignment of the operand needs to
2115 be checked separately.
2116 Valid addresses are single references or a sum of a reference and a
2117 constant integer. Return these parts in SYMREF and ADDEND. You can
2118 pass NULL in REF and/or ADDEND if you are not interested in these
2119 values. Literal pool references are *not* considered symbol
2123 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
2125 HOST_WIDE_INT tmpaddend = 0;
2127 if (GET_CODE (addr) == CONST)
2128 addr = XEXP (addr, 0);
2130 if (GET_CODE (addr) == PLUS)
2132 if (!CONST_INT_P (XEXP (addr, 1)))
2135 tmpaddend = INTVAL (XEXP (addr, 1));
2136 addr = XEXP (addr, 0);
2139 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
2140 || (GET_CODE (addr) == UNSPEC
2141 && (XINT (addr, 1) == UNSPEC_GOTENT
2142 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
2147 *addend = tmpaddend;
2154 /* Return true if the address in OP is valid for constraint letter C
2155 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
2156 pool MEMs should be accepted. Only the Q, R, S, T constraint
2157 letters are allowed for C. */
2160 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
2162 struct s390_address addr;
2163 bool decomposed = false;
2165 /* This check makes sure that no symbolic address (except literal
2166 pool references) are accepted by the R or T constraints. */
2167 if (s390_loadrelative_operand_p (op, NULL, NULL))
2170 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
2173 if (!s390_decompose_address (op, &addr))
2175 if (addr.literal_pool)
2182 case 'Q': /* no index short displacement */
2183 if (!decomposed && !s390_decompose_address (op, &addr))
2187 if (!s390_short_displacement (addr.disp))
2191 case 'R': /* with index short displacement */
2192 if (TARGET_LONG_DISPLACEMENT)
2194 if (!decomposed && !s390_decompose_address (op, &addr))
2196 if (!s390_short_displacement (addr.disp))
2199 /* Any invalid address here will be fixed up by reload,
2200 so accept it for the most generic constraint. */
2203 case 'S': /* no index long displacement */
2204 if (!TARGET_LONG_DISPLACEMENT)
2206 if (!decomposed && !s390_decompose_address (op, &addr))
2210 if (s390_short_displacement (addr.disp))
2214 case 'T': /* with index long displacement */
2215 if (!TARGET_LONG_DISPLACEMENT)
2217 /* Any invalid address here will be fixed up by reload,
2218 so accept it for the most generic constraint. */
2219 if ((decomposed || s390_decompose_address (op, &addr))
2220 && s390_short_displacement (addr.disp))
2230 /* Evaluates constraint strings described by the regular expression
2231 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
2232 the constraint given in STR, or 0 else. */
2235 s390_mem_constraint (const char *str, rtx op)
2242 /* Check for offsettable variants of memory constraints. */
2243 if (!MEM_P (op) || MEM_VOLATILE_P (op))
2245 if ((reload_completed || reload_in_progress)
2246 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
2248 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
2250 /* Check for non-literal-pool variants of memory constraints. */
2253 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
2258 if (GET_CODE (op) != MEM)
2260 return s390_check_qrst_address (c, XEXP (op, 0), true);
2262 return (s390_check_qrst_address ('Q', op, true)
2263 || s390_check_qrst_address ('R', op, true));
2265 return (s390_check_qrst_address ('S', op, true)
2266 || s390_check_qrst_address ('T', op, true));
2268 /* Simply check for the basic form of a shift count. Reload will
2269 take care of making sure we have a proper base register. */
2270 if (!s390_decompose_shift_count (op, NULL, NULL))
2274 return s390_check_qrst_address (str[1], op, true);
2282 /* Evaluates constraint strings starting with letter O. Input
2283 parameter C is the second letter following the "O" in the constraint
2284 string. Returns 1 if VALUE meets the respective constraint and 0
2288 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
2296 return trunc_int_for_mode (value, SImode) == value;
2300 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
2303 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
2311 /* Evaluates constraint strings starting with letter N. Parameter STR
2312 contains the letters following letter "N" in the constraint string.
2313 Returns true if VALUE matches the constraint. */
2316 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
2318 enum machine_mode mode, part_mode;
2320 int part, part_goal;
2326 part_goal = str[0] - '0';
2370 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
2373 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
2376 if (part_goal != -1 && part_goal != part)
2383 /* Returns true if the input parameter VALUE is a float zero. */
2386 s390_float_const_zero_p (rtx value)
2388 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
2389 && value == CONST0_RTX (GET_MODE (value)));
2392 /* Implement TARGET_REGISTER_MOVE_COST. */
2395 s390_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2396 reg_class_t from, reg_class_t to)
2398 /* On s390, copy between fprs and gprs is expensive as long as no
2399 ldgr/lgdr can be used. */
2400 if ((!TARGET_Z10 || GET_MODE_SIZE (mode) != 8)
2401 && ((reg_classes_intersect_p (from, GENERAL_REGS)
2402 && reg_classes_intersect_p (to, FP_REGS))
2403 || (reg_classes_intersect_p (from, FP_REGS)
2404 && reg_classes_intersect_p (to, GENERAL_REGS))))
2410 /* Implement TARGET_MEMORY_MOVE_COST. */
2413 s390_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2414 reg_class_t rclass ATTRIBUTE_UNUSED,
2415 bool in ATTRIBUTE_UNUSED)
2420 /* Compute a (partial) cost for rtx X. Return true if the complete
2421 cost has been computed, and false if subexpressions should be
2422 scanned. In either case, *TOTAL contains the cost result.
2423 CODE contains GET_CODE (x), OUTER_CODE contains the code
2424 of the superexpression of x. */
2427 s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
2428 int *total, bool speed ATTRIBUTE_UNUSED)
2451 *total = COSTS_N_INSNS (1);
2456 *total = COSTS_N_INSNS (1);
2460 switch (GET_MODE (x))
2464 rtx left = XEXP (x, 0);
2465 rtx right = XEXP (x, 1);
2466 if (GET_CODE (right) == CONST_INT
2467 && CONST_OK_FOR_K (INTVAL (right)))
2468 *total = s390_cost->mhi;
2469 else if (GET_CODE (left) == SIGN_EXTEND)
2470 *total = s390_cost->mh;
2472 *total = s390_cost->ms; /* msr, ms, msy */
2477 rtx left = XEXP (x, 0);
2478 rtx right = XEXP (x, 1);
2481 if (GET_CODE (right) == CONST_INT
2482 && CONST_OK_FOR_K (INTVAL (right)))
2483 *total = s390_cost->mghi;
2484 else if (GET_CODE (left) == SIGN_EXTEND)
2485 *total = s390_cost->msgf;
2487 *total = s390_cost->msg; /* msgr, msg */
2489 else /* TARGET_31BIT */
2491 if (GET_CODE (left) == SIGN_EXTEND
2492 && GET_CODE (right) == SIGN_EXTEND)
2493 /* mulsidi case: mr, m */
2494 *total = s390_cost->m;
2495 else if (GET_CODE (left) == ZERO_EXTEND
2496 && GET_CODE (right) == ZERO_EXTEND
2497 && TARGET_CPU_ZARCH)
2498 /* umulsidi case: ml, mlr */
2499 *total = s390_cost->ml;
2501 /* Complex calculation is required. */
2502 *total = COSTS_N_INSNS (40);
2508 *total = s390_cost->mult_df;
2511 *total = s390_cost->mxbr;
2519 switch (GET_MODE (x))
2522 *total = s390_cost->madbr;
2525 *total = s390_cost->maebr;
2530 /* Negate in the third argument is free: FMSUB. */
2531 if (GET_CODE (XEXP (x, 2)) == NEG)
2533 *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed)
2534 + rtx_cost (XEXP (x, 1), FMA, 1, speed)
2535 + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed));
2542 if (GET_MODE (x) == TImode) /* 128 bit division */
2543 *total = s390_cost->dlgr;
2544 else if (GET_MODE (x) == DImode)
2546 rtx right = XEXP (x, 1);
2547 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2548 *total = s390_cost->dlr;
2549 else /* 64 by 64 bit division */
2550 *total = s390_cost->dlgr;
2552 else if (GET_MODE (x) == SImode) /* 32 bit division */
2553 *total = s390_cost->dlr;
2558 if (GET_MODE (x) == DImode)
2560 rtx right = XEXP (x, 1);
2561 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2563 *total = s390_cost->dsgfr;
2565 *total = s390_cost->dr;
2566 else /* 64 by 64 bit division */
2567 *total = s390_cost->dsgr;
2569 else if (GET_MODE (x) == SImode) /* 32 bit division */
2570 *total = s390_cost->dlr;
2571 else if (GET_MODE (x) == SFmode)
2573 *total = s390_cost->debr;
2575 else if (GET_MODE (x) == DFmode)
2577 *total = s390_cost->ddbr;
2579 else if (GET_MODE (x) == TFmode)
2581 *total = s390_cost->dxbr;
2586 if (GET_MODE (x) == SFmode)
2587 *total = s390_cost->sqebr;
2588 else if (GET_MODE (x) == DFmode)
2589 *total = s390_cost->sqdbr;
2591 *total = s390_cost->sqxbr;
2596 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
2597 || outer_code == PLUS || outer_code == MINUS
2598 || outer_code == COMPARE)
2603 *total = COSTS_N_INSNS (1);
2604 if (GET_CODE (XEXP (x, 0)) == AND
2605 && GET_CODE (XEXP (x, 1)) == CONST_INT
2606 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2608 rtx op0 = XEXP (XEXP (x, 0), 0);
2609 rtx op1 = XEXP (XEXP (x, 0), 1);
2610 rtx op2 = XEXP (x, 1);
2612 if (memory_operand (op0, GET_MODE (op0))
2613 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
2615 if (register_operand (op0, GET_MODE (op0))
2616 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
2626 /* Return the cost of an address rtx ADDR. */
2629 s390_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED,
2630 addr_space_t as ATTRIBUTE_UNUSED,
2631 bool speed ATTRIBUTE_UNUSED)
2633 struct s390_address ad;
2634 if (!s390_decompose_address (addr, &ad))
2637 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
2640 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
2641 otherwise return 0. */
2644 tls_symbolic_operand (rtx op)
2646 if (GET_CODE (op) != SYMBOL_REF)
2648 return SYMBOL_REF_TLS_MODEL (op);
2651 /* Split DImode access register reference REG (on 64-bit) into its constituent
2652 low and high parts, and store them into LO and HI. Note that gen_lowpart/
2653 gen_highpart cannot be used as they assume all registers are word-sized,
2654 while our access registers have only half that size. */
2657 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
2659 gcc_assert (TARGET_64BIT);
2660 gcc_assert (ACCESS_REG_P (reg));
2661 gcc_assert (GET_MODE (reg) == DImode);
2662 gcc_assert (!(REGNO (reg) & 1));
2664 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
2665 *hi = gen_rtx_REG (SImode, REGNO (reg));
2668 /* Return true if OP contains a symbol reference */
2671 symbolic_reference_mentioned_p (rtx op)
2676 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
2679 fmt = GET_RTX_FORMAT (GET_CODE (op));
2680 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2686 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2687 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2691 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
2698 /* Return true if OP contains a reference to a thread-local symbol. */
2701 tls_symbolic_reference_mentioned_p (rtx op)
2706 if (GET_CODE (op) == SYMBOL_REF)
2707 return tls_symbolic_operand (op);
2709 fmt = GET_RTX_FORMAT (GET_CODE (op));
2710 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2716 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2717 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2721 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
2729 /* Return true if OP is a legitimate general operand when
2730 generating PIC code. It is given that flag_pic is on
2731 and that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2734 legitimate_pic_operand_p (rtx op)
2736 /* Accept all non-symbolic constants. */
2737 if (!SYMBOLIC_CONST (op))
2740 /* Reject everything else; must be handled
2741 via emit_symbolic_move. */
2745 /* Returns true if the constant value OP is a legitimate general operand.
2746 It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2749 s390_legitimate_constant_p (enum machine_mode mode, rtx op)
2751 /* Accept all non-symbolic constants. */
2752 if (!SYMBOLIC_CONST (op))
2755 /* Accept immediate LARL operands. */
2756 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
2759 /* Thread-local symbols are never legal constants. This is
2760 so that emit_call knows that computing such addresses
2761 might require a function call. */
2762 if (TLS_SYMBOLIC_CONST (op))
2765 /* In the PIC case, symbolic constants must *not* be
2766 forced into the literal pool. We accept them here,
2767 so that they will be handled by emit_symbolic_move. */
2771 /* All remaining non-PIC symbolic constants are
2772 forced into the literal pool. */
2776 /* Determine if it's legal to put X into the constant pool. This
2777 is not possible if X contains the address of a symbol that is
2778 not constant (TLS) or not known at final link time (PIC). */
2781 s390_cannot_force_const_mem (enum machine_mode mode, rtx x)
2783 switch (GET_CODE (x))
2787 /* Accept all non-symbolic constants. */
2791 /* Labels are OK iff we are non-PIC. */
2792 return flag_pic != 0;
2795 /* 'Naked' TLS symbol references are never OK,
2796 non-TLS symbols are OK iff we are non-PIC. */
2797 if (tls_symbolic_operand (x))
2800 return flag_pic != 0;
2803 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
2806 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
2807 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
2810 switch (XINT (x, 1))
2812 /* Only lt-relative or GOT-relative UNSPECs are OK. */
2813 case UNSPEC_LTREL_OFFSET:
2821 case UNSPEC_GOTNTPOFF:
2822 case UNSPEC_INDNTPOFF:
2825 /* If the literal pool shares the code section, be put
2826 execute template placeholders into the pool as well. */
2828 return TARGET_CPU_ZARCH;
2840 /* Returns true if the constant value OP is a legitimate general
2841 operand during and after reload. The difference to
2842 legitimate_constant_p is that this function will not accept
2843 a constant that would need to be forced to the literal pool
2844 before it can be used as operand.
2845 This function accepts all constants which can be loaded directly
2849 legitimate_reload_constant_p (rtx op)
2851 /* Accept la(y) operands. */
2852 if (GET_CODE (op) == CONST_INT
2853 && DISP_IN_RANGE (INTVAL (op)))
2856 /* Accept l(g)hi/l(g)fi operands. */
2857 if (GET_CODE (op) == CONST_INT
2858 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
2861 /* Accept lliXX operands. */
2863 && GET_CODE (op) == CONST_INT
2864 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2865 && s390_single_part (op, word_mode, HImode, 0) >= 0)
2869 && GET_CODE (op) == CONST_INT
2870 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2871 && s390_single_part (op, word_mode, SImode, 0) >= 0)
2874 /* Accept larl operands. */
2875 if (TARGET_CPU_ZARCH
2876 && larl_operand (op, VOIDmode))
2879 /* Accept floating-point zero operands that fit into a single GPR. */
2880 if (GET_CODE (op) == CONST_DOUBLE
2881 && s390_float_const_zero_p (op)
2882 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
2885 /* Accept double-word operands that can be split. */
2886 if (GET_CODE (op) == CONST_INT
2887 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
2889 enum machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
2890 rtx hi = operand_subword (op, 0, 0, dword_mode);
2891 rtx lo = operand_subword (op, 1, 0, dword_mode);
2892 return legitimate_reload_constant_p (hi)
2893 && legitimate_reload_constant_p (lo);
2896 /* Everything else cannot be handled without reload. */
2900 /* Returns true if the constant value OP is a legitimate fp operand
2901 during and after reload.
2902 This function accepts all constants which can be loaded directly
2906 legitimate_reload_fp_constant_p (rtx op)
2908 /* Accept floating-point zero operands if the load zero instruction
2909 can be used. Prior to z196 the load fp zero instruction caused a
2910 performance penalty if the result is used as BFP number. */
2912 && GET_CODE (op) == CONST_DOUBLE
2913 && s390_float_const_zero_p (op))
2919 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
2920 return the class of reg to actually use. */
2923 s390_preferred_reload_class (rtx op, reg_class_t rclass)
2925 switch (GET_CODE (op))
2927 /* Constants we cannot reload into general registers
2928 must be forced into the literal pool. */
2931 if (reg_class_subset_p (GENERAL_REGS, rclass)
2932 && legitimate_reload_constant_p (op))
2933 return GENERAL_REGS;
2934 else if (reg_class_subset_p (ADDR_REGS, rclass)
2935 && legitimate_reload_constant_p (op))
2937 else if (reg_class_subset_p (FP_REGS, rclass)
2938 && legitimate_reload_fp_constant_p (op))
2942 /* If a symbolic constant or a PLUS is reloaded,
2943 it is most likely being used as an address, so
2944 prefer ADDR_REGS. If 'class' is not a superset
2945 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
2947 /* Symrefs cannot be pushed into the literal pool with -fPIC
2948 so we *MUST NOT* return NO_REGS for these cases
2949 (s390_cannot_force_const_mem will return true).
2951 On the other hand we MUST return NO_REGS for symrefs with
2952 invalid addend which might have been pushed to the literal
2953 pool (no -fPIC). Usually we would expect them to be
2954 handled via secondary reload but this does not happen if
2955 they are used as literal pool slot replacement in reload
2956 inheritance (see emit_input_reload_insns). */
2957 if (TARGET_CPU_ZARCH
2958 && GET_CODE (XEXP (op, 0)) == PLUS
2959 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
2960 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
2962 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
2970 if (!legitimate_reload_constant_p (op))
2974 /* load address will be used. */
2975 if (reg_class_subset_p (ADDR_REGS, rclass))
2987 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
2988 multiple of ALIGNMENT and the SYMBOL_REF being naturally
2992 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
2994 HOST_WIDE_INT addend;
2997 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3000 if (addend & (alignment - 1))
3003 if (GET_CODE (symref) == SYMBOL_REF
3004 && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref))
3007 if (GET_CODE (symref) == UNSPEC
3008 && alignment <= UNITS_PER_LONG)
3014 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
3015 operand SCRATCH is used to reload the even part of the address and
3019 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
3021 HOST_WIDE_INT addend;
3024 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3028 /* Easy case. The addend is even so larl will do fine. */
3029 emit_move_insn (reg, addr);
3032 /* We can leave the scratch register untouched if the target
3033 register is a valid base register. */
3034 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
3035 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
3038 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
3039 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
3042 emit_move_insn (scratch,
3043 gen_rtx_CONST (Pmode,
3044 gen_rtx_PLUS (Pmode, symref,
3045 GEN_INT (addend - 1))));
3047 emit_move_insn (scratch, symref);
3049 /* Increment the address using la in order to avoid clobbering cc. */
3050 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
3054 /* Generate what is necessary to move between REG and MEM using
3055 SCRATCH. The direction is given by TOMEM. */
3058 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
3060 /* Reload might have pulled a constant out of the literal pool.
3061 Force it back in. */
3062 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
3063 || GET_CODE (mem) == CONST)
3064 mem = force_const_mem (GET_MODE (reg), mem);
3066 gcc_assert (MEM_P (mem));
3068 /* For a load from memory we can leave the scratch register
3069 untouched if the target register is a valid base register. */
3071 && REGNO (reg) < FIRST_PSEUDO_REGISTER
3072 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
3073 && GET_MODE (reg) == GET_MODE (scratch))
3076 /* Load address into scratch register. Since we can't have a
3077 secondary reload for a secondary reload we have to cover the case
3078 where larl would need a secondary reload here as well. */
3079 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
3081 /* Now we can use a standard load/store to do the move. */
3083 emit_move_insn (replace_equiv_address (mem, scratch), reg);
3085 emit_move_insn (reg, replace_equiv_address (mem, scratch));
3088 /* Inform reload about cases where moving X with a mode MODE to a register in
3089 RCLASS requires an extra scratch or immediate register. Return the class
3090 needed for the immediate register. */
3093 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
3094 enum machine_mode mode, secondary_reload_info *sri)
3096 enum reg_class rclass = (enum reg_class) rclass_i;
3098 /* Intermediate register needed. */
3099 if (reg_classes_intersect_p (CC_REGS, rclass))
3100 return GENERAL_REGS;
3104 HOST_WIDE_INT offset;
3107 /* On z10 several optimizer steps may generate larl operands with
3110 && s390_loadrelative_operand_p (x, &symref, &offset)
3112 && !SYMBOL_REF_ALIGN1_P (symref)
3113 && (offset & 1) == 1)
3114 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
3115 : CODE_FOR_reloadsi_larl_odd_addend_z10);
3117 /* On z10 we need a scratch register when moving QI, TI or floating
3118 point mode values from or to a memory location with a SYMBOL_REF
3119 or if the symref addend of a SI or DI move is not aligned to the
3120 width of the access. */
3122 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
3123 && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode)
3124 || (!TARGET_ZARCH && mode == DImode)
3125 || ((mode == HImode || mode == SImode || mode == DImode)
3126 && (!s390_check_symref_alignment (XEXP (x, 0),
3127 GET_MODE_SIZE (mode))))))
3129 #define __SECONDARY_RELOAD_CASE(M,m) \
3132 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
3133 CODE_FOR_reload##m##di_tomem_z10; \
3135 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
3136 CODE_FOR_reload##m##si_tomem_z10; \
3139 switch (GET_MODE (x))
3141 __SECONDARY_RELOAD_CASE (QI, qi);
3142 __SECONDARY_RELOAD_CASE (HI, hi);
3143 __SECONDARY_RELOAD_CASE (SI, si);
3144 __SECONDARY_RELOAD_CASE (DI, di);
3145 __SECONDARY_RELOAD_CASE (TI, ti);
3146 __SECONDARY_RELOAD_CASE (SF, sf);
3147 __SECONDARY_RELOAD_CASE (DF, df);
3148 __SECONDARY_RELOAD_CASE (TF, tf);
3149 __SECONDARY_RELOAD_CASE (SD, sd);
3150 __SECONDARY_RELOAD_CASE (DD, dd);
3151 __SECONDARY_RELOAD_CASE (TD, td);
3156 #undef __SECONDARY_RELOAD_CASE
3160 /* We need a scratch register when loading a PLUS expression which
3161 is not a legitimate operand of the LOAD ADDRESS instruction. */
3162 /* LRA can deal with transformation of plus op very well -- so we
3163 don't need to prompt LRA in this case. */
3164 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
3165 sri->icode = (TARGET_64BIT ?
3166 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
3168 /* Performing a multiword move from or to memory we have to make sure the
3169 second chunk in memory is addressable without causing a displacement
3170 overflow. If that would be the case we calculate the address in
3171 a scratch register. */
3173 && GET_CODE (XEXP (x, 0)) == PLUS
3174 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3175 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
3176 + GET_MODE_SIZE (mode) - 1))
3178 /* For GENERAL_REGS a displacement overflow is no problem if occurring
3179 in a s_operand address since we may fallback to lm/stm. So we only
3180 have to care about overflows in the b+i+d case. */
3181 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
3182 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
3183 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
3184 /* For FP_REGS no lm/stm is available so this check is triggered
3185 for displacement overflows in b+i+d and b+d like addresses. */
3186 || (reg_classes_intersect_p (FP_REGS, rclass)
3187 && s390_class_max_nregs (FP_REGS, mode) > 1))
3190 sri->icode = (TARGET_64BIT ?
3191 CODE_FOR_reloaddi_nonoffmem_in :
3192 CODE_FOR_reloadsi_nonoffmem_in);
3194 sri->icode = (TARGET_64BIT ?
3195 CODE_FOR_reloaddi_nonoffmem_out :
3196 CODE_FOR_reloadsi_nonoffmem_out);
3200 /* A scratch address register is needed when a symbolic constant is
3201 copied to r0 compiling with -fPIC. In other cases the target
3202 register might be used as temporary (see legitimize_pic_address). */
3203 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
3204 sri->icode = (TARGET_64BIT ?
3205 CODE_FOR_reloaddi_PIC_addr :
3206 CODE_FOR_reloadsi_PIC_addr);
3208 /* Either scratch or no register needed. */
3212 /* Generate code to load SRC, which is PLUS that is not a
3213 legitimate operand for the LA instruction, into TARGET.
3214 SCRATCH may be used as scratch register. */
3217 s390_expand_plus_operand (rtx target, rtx src,
3221 struct s390_address ad;
3223 /* src must be a PLUS; get its two operands. */
3224 gcc_assert (GET_CODE (src) == PLUS);
3225 gcc_assert (GET_MODE (src) == Pmode);
3227 /* Check if any of the two operands is already scheduled
3228 for replacement by reload. This can happen e.g. when
3229 float registers occur in an address. */
3230 sum1 = find_replacement (&XEXP (src, 0));
3231 sum2 = find_replacement (&XEXP (src, 1));
3232 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3234 /* If the address is already strictly valid, there's nothing to do. */
3235 if (!s390_decompose_address (src, &ad)
3236 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3237 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
3239 /* Otherwise, one of the operands cannot be an address register;
3240 we reload its value into the scratch register. */
3241 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
3243 emit_move_insn (scratch, sum1);
3246 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
3248 emit_move_insn (scratch, sum2);
3252 /* According to the way these invalid addresses are generated
3253 in reload.c, it should never happen (at least on s390) that
3254 *neither* of the PLUS components, after find_replacements
3255 was applied, is an address register. */
3256 if (sum1 == scratch && sum2 == scratch)
3262 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3265 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
3266 is only ever performed on addresses, so we can mark the
3267 sum as legitimate for LA in any case. */
3268 s390_load_address (target, src);
3272 /* Return true if ADDR is a valid memory address.
3273 STRICT specifies whether strict register checking applies. */
3276 s390_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3278 struct s390_address ad;
3281 && larl_operand (addr, VOIDmode)
3282 && (mode == VOIDmode
3283 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
3286 if (!s390_decompose_address (addr, &ad))
3291 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3294 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
3300 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
3301 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
3305 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
3306 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
3312 /* Return true if OP is a valid operand for the LA instruction.
3313 In 31-bit, we need to prove that the result is used as an
3314 address, as LA performs only a 31-bit addition. */
3317 legitimate_la_operand_p (rtx op)
3319 struct s390_address addr;
3320 if (!s390_decompose_address (op, &addr))
3323 return (TARGET_64BIT || addr.pointer);
3326 /* Return true if it is valid *and* preferable to use LA to
3327 compute the sum of OP1 and OP2. */
3330 preferred_la_operand_p (rtx op1, rtx op2)
3332 struct s390_address addr;
3334 if (op2 != const0_rtx)
3335 op1 = gen_rtx_PLUS (Pmode, op1, op2);
3337 if (!s390_decompose_address (op1, &addr))
3339 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
3341 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
3344 /* Avoid LA instructions with index register on z196; it is
3345 preferable to use regular add instructions when possible.
3346 Starting with zEC12 the la with index register is "uncracked"
3348 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
3351 if (!TARGET_64BIT && !addr.pointer)
3357 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
3358 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
3364 /* Emit a forced load-address operation to load SRC into DST.
3365 This will use the LOAD ADDRESS instruction even in situations
3366 where legitimate_la_operand_p (SRC) returns false. */
3369 s390_load_address (rtx dst, rtx src)
3372 emit_move_insn (dst, src);
3374 emit_insn (gen_force_la_31 (dst, src));
3377 /* Return a legitimate reference for ORIG (an address) using the
3378 register REG. If REG is 0, a new pseudo is generated.
3380 There are two types of references that must be handled:
3382 1. Global data references must load the address from the GOT, via
3383 the PIC reg. An insn is emitted to do this load, and the reg is
3386 2. Static data references, constant pool addresses, and code labels
3387 compute the address as an offset from the GOT, whose base is in
3388 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
3389 differentiate them from global data objects. The returned
3390 address is the PIC reg + an unspec constant.
3392 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
3393 reg also appears in the address. */
3396 legitimize_pic_address (rtx orig, rtx reg)
3399 rtx addend = const0_rtx;
3402 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
3404 if (GET_CODE (addr) == CONST)
3405 addr = XEXP (addr, 0);
3407 if (GET_CODE (addr) == PLUS)
3409 addend = XEXP (addr, 1);
3410 addr = XEXP (addr, 0);
3413 if ((GET_CODE (addr) == LABEL_REF
3414 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
3415 || (GET_CODE (addr) == UNSPEC &&
3416 (XINT (addr, 1) == UNSPEC_GOTENT
3417 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3418 && GET_CODE (addend) == CONST_INT)
3420 /* This can be locally addressed. */
3422 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
3423 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
3424 gen_rtx_CONST (Pmode, addr) : addr);
3426 if (TARGET_CPU_ZARCH
3427 && larl_operand (const_addr, VOIDmode)
3428 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
3429 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
3431 if (INTVAL (addend) & 1)
3433 /* LARL can't handle odd offsets, so emit a pair of LARL
3435 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3437 if (!DISP_IN_RANGE (INTVAL (addend)))
3439 HOST_WIDE_INT even = INTVAL (addend) - 1;
3440 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
3441 addr = gen_rtx_CONST (Pmode, addr);
3442 addend = const1_rtx;
3445 emit_move_insn (temp, addr);
3446 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
3450 s390_load_address (reg, new_rtx);
3456 /* If the offset is even, we can just use LARL. This
3457 will happen automatically. */
3462 /* No larl - Access local symbols relative to the GOT. */
3464 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3466 if (reload_in_progress || reload_completed)
3467 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3469 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
3470 if (addend != const0_rtx)
3471 addr = gen_rtx_PLUS (Pmode, addr, addend);
3472 addr = gen_rtx_CONST (Pmode, addr);
3473 addr = force_const_mem (Pmode, addr);
3474 emit_move_insn (temp, addr);
3476 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3479 s390_load_address (reg, new_rtx);
3484 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
3486 /* A non-local symbol reference without addend.
3488 The symbol ref is wrapped into an UNSPEC to make sure the
3489 proper operand modifier (@GOT or @GOTENT) will be emitted.
3490 This will tell the linker to put the symbol into the GOT.
3492 Additionally the code dereferencing the GOT slot is emitted here.
3494 An addend to the symref needs to be added afterwards.
3495 legitimize_pic_address calls itself recursively to handle
3496 that case. So no need to do it here. */
3499 reg = gen_reg_rtx (Pmode);
3503 /* Use load relative if possible.
3504 lgrl <target>, sym@GOTENT */
3505 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3506 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3507 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
3509 emit_move_insn (reg, new_rtx);
3512 else if (flag_pic == 1)
3514 /* Assume GOT offset is a valid displacement operand (< 4k
3515 or < 512k with z990). This is handled the same way in
3516 both 31- and 64-bit code (@GOT).
3517 lg <target>, sym@GOT(r12) */
3519 if (reload_in_progress || reload_completed)
3520 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3522 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3523 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3524 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3525 new_rtx = gen_const_mem (Pmode, new_rtx);
3526 emit_move_insn (reg, new_rtx);
3529 else if (TARGET_CPU_ZARCH)
3531 /* If the GOT offset might be >= 4k, we determine the position
3532 of the GOT entry via a PC-relative LARL (@GOTENT).
3533 larl temp, sym@GOTENT
3534 lg <target>, 0(temp) */
3536 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3538 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3539 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3541 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3542 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3543 emit_move_insn (temp, new_rtx);
3545 new_rtx = gen_const_mem (Pmode, temp);
3546 emit_move_insn (reg, new_rtx);
3552 /* If the GOT offset might be >= 4k, we have to load it
3553 from the literal pool (@GOT).
3555 lg temp, lit-litbase(r13)
3556 lg <target>, 0(temp)
3557 lit: .long sym@GOT */
3559 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3561 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3562 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3564 if (reload_in_progress || reload_completed)
3565 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3567 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3568 addr = gen_rtx_CONST (Pmode, addr);
3569 addr = force_const_mem (Pmode, addr);
3570 emit_move_insn (temp, addr);
3572 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3573 new_rtx = gen_const_mem (Pmode, new_rtx);
3574 emit_move_insn (reg, new_rtx);
3578 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
3580 gcc_assert (XVECLEN (addr, 0) == 1);
3581 switch (XINT (addr, 1))
3583 /* These address symbols (or PLT slots) relative to the GOT
3584 (not GOT slots!). In general this will exceed the
3585 displacement range so these value belong into the literal
3589 new_rtx = force_const_mem (Pmode, orig);
3592 /* For -fPIC the GOT size might exceed the displacement
3593 range so make sure the value is in the literal pool. */
3596 new_rtx = force_const_mem (Pmode, orig);
3599 /* For @GOTENT larl is used. This is handled like local
3605 /* @PLT is OK as is on 64-bit, must be converted to
3606 GOT-relative @PLTOFF on 31-bit. */
3608 if (!TARGET_CPU_ZARCH)
3610 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3612 if (reload_in_progress || reload_completed)
3613 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3615 addr = XVECEXP (addr, 0, 0);
3616 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
3618 if (addend != const0_rtx)
3619 addr = gen_rtx_PLUS (Pmode, addr, addend);
3620 addr = gen_rtx_CONST (Pmode, addr);
3621 addr = force_const_mem (Pmode, addr);
3622 emit_move_insn (temp, addr);
3624 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3627 s390_load_address (reg, new_rtx);
3632 /* On 64 bit larl can be used. This case is handled like
3633 local symbol refs. */
3637 /* Everything else cannot happen. */
3642 else if (addend != const0_rtx)
3644 /* Otherwise, compute the sum. */
3646 rtx base = legitimize_pic_address (addr, reg);
3647 new_rtx = legitimize_pic_address (addend,
3648 base == reg ? NULL_RTX : reg);
3649 if (GET_CODE (new_rtx) == CONST_INT)
3650 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
3653 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
3655 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
3656 new_rtx = XEXP (new_rtx, 1);
3658 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
3661 if (GET_CODE (new_rtx) == CONST)
3662 new_rtx = XEXP (new_rtx, 0);
3663 new_rtx = force_operand (new_rtx, 0);
3669 /* Load the thread pointer into a register. */
3672 s390_get_thread_pointer (void)
3674 rtx tp = gen_reg_rtx (Pmode);
3676 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
3677 mark_reg_pointer (tp, BITS_PER_WORD);
3682 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
3683 in s390_tls_symbol which always refers to __tls_get_offset.
3684 The returned offset is written to RESULT_REG and an USE rtx is
3685 generated for TLS_CALL. */
3687 static GTY(()) rtx s390_tls_symbol;
3690 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
3695 emit_insn (s390_load_got ());
3697 if (!s390_tls_symbol)
3698 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
3700 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
3701 gen_rtx_REG (Pmode, RETURN_REGNUM));
3703 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
3704 RTL_CONST_CALL_P (insn) = 1;
3707 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3708 this (thread-local) address. REG may be used as temporary. */
3711 legitimize_tls_address (rtx addr, rtx reg)
3713 rtx new_rtx, tls_call, temp, base, r2, insn;
3715 if (GET_CODE (addr) == SYMBOL_REF)
3716 switch (tls_symbolic_operand (addr))
3718 case TLS_MODEL_GLOBAL_DYNAMIC:
3720 r2 = gen_rtx_REG (Pmode, 2);
3721 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
3722 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3723 new_rtx = force_const_mem (Pmode, new_rtx);
3724 emit_move_insn (r2, new_rtx);
3725 s390_emit_tls_call_insn (r2, tls_call);
3726 insn = get_insns ();
3729 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3730 temp = gen_reg_rtx (Pmode);
3731 emit_libcall_block (insn, temp, r2, new_rtx);
3733 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3736 s390_load_address (reg, new_rtx);
3741 case TLS_MODEL_LOCAL_DYNAMIC:
3743 r2 = gen_rtx_REG (Pmode, 2);
3744 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
3745 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3746 new_rtx = force_const_mem (Pmode, new_rtx);
3747 emit_move_insn (r2, new_rtx);
3748 s390_emit_tls_call_insn (r2, tls_call);
3749 insn = get_insns ();
3752 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
3753 temp = gen_reg_rtx (Pmode);
3754 emit_libcall_block (insn, temp, r2, new_rtx);
3756 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3757 base = gen_reg_rtx (Pmode);
3758 s390_load_address (base, new_rtx);
3760 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
3761 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3762 new_rtx = force_const_mem (Pmode, new_rtx);
3763 temp = gen_reg_rtx (Pmode);
3764 emit_move_insn (temp, new_rtx);
3766 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
3769 s390_load_address (reg, new_rtx);
3774 case TLS_MODEL_INITIAL_EXEC:
3777 /* Assume GOT offset < 4k. This is handled the same way
3778 in both 31- and 64-bit code. */
3780 if (reload_in_progress || reload_completed)
3781 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3783 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3784 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3785 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3786 new_rtx = gen_const_mem (Pmode, new_rtx);
3787 temp = gen_reg_rtx (Pmode);
3788 emit_move_insn (temp, new_rtx);
3790 else if (TARGET_CPU_ZARCH)
3792 /* If the GOT offset might be >= 4k, we determine the position
3793 of the GOT entry via a PC-relative LARL. */
3795 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3796 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3797 temp = gen_reg_rtx (Pmode);
3798 emit_move_insn (temp, new_rtx);
3800 new_rtx = gen_const_mem (Pmode, temp);
3801 temp = gen_reg_rtx (Pmode);
3802 emit_move_insn (temp, new_rtx);
3806 /* If the GOT offset might be >= 4k, we have to load it
3807 from the literal pool. */
3809 if (reload_in_progress || reload_completed)
3810 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3812 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3813 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3814 new_rtx = force_const_mem (Pmode, new_rtx);
3815 temp = gen_reg_rtx (Pmode);
3816 emit_move_insn (temp, new_rtx);
3818 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3819 new_rtx = gen_const_mem (Pmode, new_rtx);
3821 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3822 temp = gen_reg_rtx (Pmode);
3823 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3827 /* In position-dependent code, load the absolute address of
3828 the GOT entry from the literal pool. */
3830 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3831 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3832 new_rtx = force_const_mem (Pmode, new_rtx);
3833 temp = gen_reg_rtx (Pmode);
3834 emit_move_insn (temp, new_rtx);
3837 new_rtx = gen_const_mem (Pmode, new_rtx);
3838 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3839 temp = gen_reg_rtx (Pmode);
3840 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3843 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3846 s390_load_address (reg, new_rtx);
3851 case TLS_MODEL_LOCAL_EXEC:
3852 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3853 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3854 new_rtx = force_const_mem (Pmode, new_rtx);
3855 temp = gen_reg_rtx (Pmode);
3856 emit_move_insn (temp, new_rtx);
3858 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3861 s390_load_address (reg, new_rtx);
3870 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
3872 switch (XINT (XEXP (addr, 0), 1))
3874 case UNSPEC_INDNTPOFF:
3875 gcc_assert (TARGET_CPU_ZARCH);
3884 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
3885 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3887 new_rtx = XEXP (XEXP (addr, 0), 0);
3888 if (GET_CODE (new_rtx) != SYMBOL_REF)
3889 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3891 new_rtx = legitimize_tls_address (new_rtx, reg);
3892 new_rtx = plus_constant (Pmode, new_rtx,
3893 INTVAL (XEXP (XEXP (addr, 0), 1)));
3894 new_rtx = force_operand (new_rtx, 0);
3898 gcc_unreachable (); /* for now ... */
3903 /* Emit insns making the address in operands[1] valid for a standard
3904 move to operands[0]. operands[1] is replaced by an address which
3905 should be used instead of the former RTX to emit the move
3909 emit_symbolic_move (rtx *operands)
3911 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
3913 if (GET_CODE (operands[0]) == MEM)
3914 operands[1] = force_reg (Pmode, operands[1]);
3915 else if (TLS_SYMBOLIC_CONST (operands[1]))
3916 operands[1] = legitimize_tls_address (operands[1], temp);
3918 operands[1] = legitimize_pic_address (operands[1], temp);
3921 /* Try machine-dependent ways of modifying an illegitimate address X
3922 to be legitimate. If we find one, return the new, valid address.
3924 OLDX is the address as it was before break_out_memory_refs was called.
3925 In some cases it is useful to look at this to decide what needs to be done.
3927 MODE is the mode of the operand pointed to by X.
3929 When -fpic is used, special handling is needed for symbolic references.
3930 See comments by legitimize_pic_address for details. */
3933 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3934 enum machine_mode mode ATTRIBUTE_UNUSED)
3936 rtx constant_term = const0_rtx;
3938 if (TLS_SYMBOLIC_CONST (x))
3940 x = legitimize_tls_address (x, 0);
3942 if (s390_legitimate_address_p (mode, x, FALSE))
3945 else if (GET_CODE (x) == PLUS
3946 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
3947 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
3953 if (SYMBOLIC_CONST (x)
3954 || (GET_CODE (x) == PLUS
3955 && (SYMBOLIC_CONST (XEXP (x, 0))
3956 || SYMBOLIC_CONST (XEXP (x, 1)))))
3957 x = legitimize_pic_address (x, 0);
3959 if (s390_legitimate_address_p (mode, x, FALSE))
3963 x = eliminate_constant_term (x, &constant_term);
3965 /* Optimize loading of large displacements by splitting them
3966 into the multiple of 4K and the rest; this allows the
3967 former to be CSE'd if possible.
3969 Don't do this if the displacement is added to a register
3970 pointing into the stack frame, as the offsets will
3971 change later anyway. */
3973 if (GET_CODE (constant_term) == CONST_INT
3974 && !TARGET_LONG_DISPLACEMENT
3975 && !DISP_IN_RANGE (INTVAL (constant_term))
3976 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
3978 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
3979 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
3981 rtx temp = gen_reg_rtx (Pmode);
3982 rtx val = force_operand (GEN_INT (upper), temp);
3984 emit_move_insn (temp, val);
3986 x = gen_rtx_PLUS (Pmode, x, temp);
3987 constant_term = GEN_INT (lower);
3990 if (GET_CODE (x) == PLUS)
3992 if (GET_CODE (XEXP (x, 0)) == REG)
3994 rtx temp = gen_reg_rtx (Pmode);
3995 rtx val = force_operand (XEXP (x, 1), temp);
3997 emit_move_insn (temp, val);
3999 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
4002 else if (GET_CODE (XEXP (x, 1)) == REG)
4004 rtx temp = gen_reg_rtx (Pmode);
4005 rtx val = force_operand (XEXP (x, 0), temp);
4007 emit_move_insn (temp, val);
4009 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
4013 if (constant_term != const0_rtx)
4014 x = gen_rtx_PLUS (Pmode, x, constant_term);
4019 /* Try a machine-dependent way of reloading an illegitimate address AD
4020 operand. If we find one, push the reload and return the new address.
4022 MODE is the mode of the enclosing MEM. OPNUM is the operand number
4023 and TYPE is the reload type of the current reload. */
4026 legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
4027 int opnum, int type)
4029 if (!optimize || TARGET_LONG_DISPLACEMENT)
4032 if (GET_CODE (ad) == PLUS)
4034 rtx tem = simplify_binary_operation (PLUS, Pmode,
4035 XEXP (ad, 0), XEXP (ad, 1));
4040 if (GET_CODE (ad) == PLUS
4041 && GET_CODE (XEXP (ad, 0)) == REG
4042 && GET_CODE (XEXP (ad, 1)) == CONST_INT
4043 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
4045 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
4046 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
4047 rtx cst, tem, new_rtx;
4049 cst = GEN_INT (upper);
4050 if (!legitimate_reload_constant_p (cst))
4051 cst = force_const_mem (Pmode, cst);
4053 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
4054 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
4056 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
4057 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
4058 opnum, (enum reload_type) type);
4065 /* Emit code to move LEN bytes from DST to SRC. */
4068 s390_expand_movmem (rtx dst, rtx src, rtx len)
4070 /* When tuning for z10 or higher we rely on the Glibc functions to
4071 do the right thing. Only for constant lengths below 64k we will
4072 generate inline code. */
4073 if (s390_tune >= PROCESSOR_2097_Z10
4074 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4077 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4079 if (INTVAL (len) > 0)
4080 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
4083 else if (TARGET_MVCLE)
4085 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
4090 rtx dst_addr, src_addr, count, blocks, temp;
4091 rtx_code_label *loop_start_label = gen_label_rtx ();
4092 rtx_code_label *loop_end_label = gen_label_rtx ();
4093 rtx_code_label *end_label = gen_label_rtx ();
4094 enum machine_mode mode;
4096 mode = GET_MODE (len);
4097 if (mode == VOIDmode)
4100 dst_addr = gen_reg_rtx (Pmode);
4101 src_addr = gen_reg_rtx (Pmode);
4102 count = gen_reg_rtx (mode);
4103 blocks = gen_reg_rtx (mode);
4105 convert_move (count, len, 1);
4106 emit_cmp_and_jump_insns (count, const0_rtx,
4107 EQ, NULL_RTX, mode, 1, end_label);
4109 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4110 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
4111 dst = change_address (dst, VOIDmode, dst_addr);
4112 src = change_address (src, VOIDmode, src_addr);
4114 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4117 emit_move_insn (count, temp);
4119 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4122 emit_move_insn (blocks, temp);
4124 emit_cmp_and_jump_insns (blocks, const0_rtx,
4125 EQ, NULL_RTX, mode, 1, loop_end_label);
4127 emit_label (loop_start_label);
4130 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
4134 /* Issue a read prefetch for the +3 cache line. */
4135 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
4136 const0_rtx, const0_rtx);
4137 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4138 emit_insn (prefetch);
4140 /* Issue a write prefetch for the +3 cache line. */
4141 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
4142 const1_rtx, const0_rtx);
4143 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4144 emit_insn (prefetch);
4147 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
4148 s390_load_address (dst_addr,
4149 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4150 s390_load_address (src_addr,
4151 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
4153 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4156 emit_move_insn (blocks, temp);
4158 emit_cmp_and_jump_insns (blocks, const0_rtx,
4159 EQ, NULL_RTX, mode, 1, loop_end_label);
4161 emit_jump (loop_start_label);
4162 emit_label (loop_end_label);
4164 emit_insn (gen_movmem_short (dst, src,
4165 convert_to_mode (Pmode, count, 1)));
4166 emit_label (end_label);
4171 /* Emit code to set LEN bytes at DST to VAL.
4172 Make use of clrmem if VAL is zero. */
4175 s390_expand_setmem (rtx dst, rtx len, rtx val)
4177 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
4180 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
4182 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
4184 if (val == const0_rtx && INTVAL (len) <= 256)
4185 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
4188 /* Initialize memory by storing the first byte. */
4189 emit_move_insn (adjust_address (dst, QImode, 0), val);
4191 if (INTVAL (len) > 1)
4193 /* Initiate 1 byte overlap move.
4194 The first byte of DST is propagated through DSTP1.
4195 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
4196 DST is set to size 1 so the rest of the memory location
4197 does not count as source operand. */
4198 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
4199 set_mem_size (dst, 1);
4201 emit_insn (gen_movmem_short (dstp1, dst,
4202 GEN_INT (INTVAL (len) - 2)));
4207 else if (TARGET_MVCLE)
4209 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
4210 emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
4215 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
4216 rtx_code_label *loop_start_label = gen_label_rtx ();
4217 rtx_code_label *loop_end_label = gen_label_rtx ();
4218 rtx_code_label *end_label = gen_label_rtx ();
4219 enum machine_mode mode;
4221 mode = GET_MODE (len);
4222 if (mode == VOIDmode)
4225 dst_addr = gen_reg_rtx (Pmode);
4226 count = gen_reg_rtx (mode);
4227 blocks = gen_reg_rtx (mode);
4229 convert_move (count, len, 1);
4230 emit_cmp_and_jump_insns (count, const0_rtx,
4231 EQ, NULL_RTX, mode, 1, end_label);
4233 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4234 dst = change_address (dst, VOIDmode, dst_addr);
4236 if (val == const0_rtx)
4237 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4241 dstp1 = adjust_address (dst, VOIDmode, 1);
4242 set_mem_size (dst, 1);
4244 /* Initialize memory by storing the first byte. */
4245 emit_move_insn (adjust_address (dst, QImode, 0), val);
4247 /* If count is 1 we are done. */
4248 emit_cmp_and_jump_insns (count, const1_rtx,
4249 EQ, NULL_RTX, mode, 1, end_label);
4251 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
4255 emit_move_insn (count, temp);
4257 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4260 emit_move_insn (blocks, temp);
4262 emit_cmp_and_jump_insns (blocks, const0_rtx,
4263 EQ, NULL_RTX, mode, 1, loop_end_label);
4265 emit_label (loop_start_label);
4268 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
4270 /* Issue a write prefetch for the +4 cache line. */
4271 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
4273 const1_rtx, const0_rtx);
4274 emit_insn (prefetch);
4275 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4278 if (val == const0_rtx)
4279 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
4281 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
4282 s390_load_address (dst_addr,
4283 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4285 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4288 emit_move_insn (blocks, temp);
4290 emit_cmp_and_jump_insns (blocks, const0_rtx,
4291 EQ, NULL_RTX, mode, 1, loop_end_label);
4293 emit_jump (loop_start_label);
4294 emit_label (loop_end_label);
4296 if (val == const0_rtx)
4297 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
4299 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
4300 emit_label (end_label);
4304 /* Emit code to compare LEN bytes at OP0 with those at OP1,
4305 and return the result in TARGET. */
4308 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
4310 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
4313 /* When tuning for z10 or higher we rely on the Glibc functions to
4314 do the right thing. Only for constant lengths below 64k we will
4315 generate inline code. */
4316 if (s390_tune >= PROCESSOR_2097_Z10
4317 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4320 /* As the result of CMPINT is inverted compared to what we need,
4321 we have to swap the operands. */
4322 tmp = op0; op0 = op1; op1 = tmp;
4324 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4326 if (INTVAL (len) > 0)
4328 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
4329 emit_insn (gen_cmpint (target, ccreg));
4332 emit_move_insn (target, const0_rtx);
4334 else if (TARGET_MVCLE)
4336 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
4337 emit_insn (gen_cmpint (target, ccreg));
4341 rtx addr0, addr1, count, blocks, temp;
4342 rtx_code_label *loop_start_label = gen_label_rtx ();
4343 rtx_code_label *loop_end_label = gen_label_rtx ();
4344 rtx_code_label *end_label = gen_label_rtx ();
4345 enum machine_mode mode;
4347 mode = GET_MODE (len);
4348 if (mode == VOIDmode)
4351 addr0 = gen_reg_rtx (Pmode);
4352 addr1 = gen_reg_rtx (Pmode);
4353 count = gen_reg_rtx (mode);
4354 blocks = gen_reg_rtx (mode);
4356 convert_move (count, len, 1);
4357 emit_cmp_and_jump_insns (count, const0_rtx,
4358 EQ, NULL_RTX, mode, 1, end_label);
4360 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
4361 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
4362 op0 = change_address (op0, VOIDmode, addr0);
4363 op1 = change_address (op1, VOIDmode, addr1);
4365 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4368 emit_move_insn (count, temp);
4370 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4373 emit_move_insn (blocks, temp);
4375 emit_cmp_and_jump_insns (blocks, const0_rtx,
4376 EQ, NULL_RTX, mode, 1, loop_end_label);
4378 emit_label (loop_start_label);
4381 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
4385 /* Issue a read prefetch for the +2 cache line of operand 1. */
4386 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
4387 const0_rtx, const0_rtx);
4388 emit_insn (prefetch);
4389 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4391 /* Issue a read prefetch for the +2 cache line of operand 2. */
4392 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
4393 const0_rtx, const0_rtx);
4394 emit_insn (prefetch);
4395 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4398 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
4399 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
4400 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
4401 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
4402 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
4403 emit_jump_insn (temp);
4405 s390_load_address (addr0,
4406 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
4407 s390_load_address (addr1,
4408 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
4410 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4413 emit_move_insn (blocks, temp);
4415 emit_cmp_and_jump_insns (blocks, const0_rtx,
4416 EQ, NULL_RTX, mode, 1, loop_end_label);
4418 emit_jump (loop_start_label);
4419 emit_label (loop_end_label);
4421 emit_insn (gen_cmpmem_short (op0, op1,
4422 convert_to_mode (Pmode, count, 1)));
4423 emit_label (end_label);
4425 emit_insn (gen_cmpint (target, ccreg));
4431 /* Expand conditional increment or decrement using alc/slb instructions.
4432 Should generate code setting DST to either SRC or SRC + INCREMENT,
4433 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
4434 Returns true if successful, false otherwise.
4436 That makes it possible to implement some if-constructs without jumps e.g.:
4437 (borrow = CC0 | CC1 and carry = CC2 | CC3)
4438 unsigned int a, b, c;
4439 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
4440 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
4441 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
4442 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
4444 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
4445 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
4446 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
4447 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
4448 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
4451 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
4452 rtx dst, rtx src, rtx increment)
4454 enum machine_mode cmp_mode;
4455 enum machine_mode cc_mode;
4461 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
4462 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
4464 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
4465 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
4470 /* Try ADD LOGICAL WITH CARRY. */
4471 if (increment == const1_rtx)
4473 /* Determine CC mode to use. */
4474 if (cmp_code == EQ || cmp_code == NE)
4476 if (cmp_op1 != const0_rtx)
4478 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4479 NULL_RTX, 0, OPTAB_WIDEN);
4480 cmp_op1 = const0_rtx;
4483 cmp_code = cmp_code == EQ ? LEU : GTU;
4486 if (cmp_code == LTU || cmp_code == LEU)
4491 cmp_code = swap_condition (cmp_code);
4508 /* Emit comparison instruction pattern. */
4509 if (!register_operand (cmp_op0, cmp_mode))
4510 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4512 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4513 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4514 /* We use insn_invalid_p here to add clobbers if required. */
4515 ret = insn_invalid_p (emit_insn (insn), false);
4518 /* Emit ALC instruction pattern. */
4519 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4520 gen_rtx_REG (cc_mode, CC_REGNUM),
4523 if (src != const0_rtx)
4525 if (!register_operand (src, GET_MODE (dst)))
4526 src = force_reg (GET_MODE (dst), src);
4528 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
4529 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
4532 p = rtvec_alloc (2);
4534 gen_rtx_SET (VOIDmode, dst, op_res);
4536 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4537 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4542 /* Try SUBTRACT LOGICAL WITH BORROW. */
4543 if (increment == constm1_rtx)
4545 /* Determine CC mode to use. */
4546 if (cmp_code == EQ || cmp_code == NE)
4548 if (cmp_op1 != const0_rtx)
4550 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4551 NULL_RTX, 0, OPTAB_WIDEN);
4552 cmp_op1 = const0_rtx;
4555 cmp_code = cmp_code == EQ ? LEU : GTU;
4558 if (cmp_code == GTU || cmp_code == GEU)
4563 cmp_code = swap_condition (cmp_code);
4580 /* Emit comparison instruction pattern. */
4581 if (!register_operand (cmp_op0, cmp_mode))
4582 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4584 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4585 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4586 /* We use insn_invalid_p here to add clobbers if required. */
4587 ret = insn_invalid_p (emit_insn (insn), false);
4590 /* Emit SLB instruction pattern. */
4591 if (!register_operand (src, GET_MODE (dst)))
4592 src = force_reg (GET_MODE (dst), src);
4594 op_res = gen_rtx_MINUS (GET_MODE (dst),
4595 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
4596 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4597 gen_rtx_REG (cc_mode, CC_REGNUM),
4599 p = rtvec_alloc (2);
4601 gen_rtx_SET (VOIDmode, dst, op_res);
4603 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4604 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4612 /* Expand code for the insv template. Return true if successful. */
4615 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
4617 int bitsize = INTVAL (op1);
4618 int bitpos = INTVAL (op2);
4619 enum machine_mode mode = GET_MODE (dest);
4620 enum machine_mode smode;
4621 int smode_bsize, mode_bsize;
4624 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
4627 /* Generate INSERT IMMEDIATE (IILL et al). */
4628 /* (set (ze (reg)) (const_int)). */
4630 && register_operand (dest, word_mode)
4631 && (bitpos % 16) == 0
4632 && (bitsize % 16) == 0
4633 && const_int_operand (src, VOIDmode))
4635 HOST_WIDE_INT val = INTVAL (src);
4636 int regpos = bitpos + bitsize;
4638 while (regpos > bitpos)
4640 enum machine_mode putmode;
4643 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
4648 putsize = GET_MODE_BITSIZE (putmode);
4650 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4653 gen_int_mode (val, putmode));
4656 gcc_assert (regpos == bitpos);
4660 smode = smallest_mode_for_size (bitsize, MODE_INT);
4661 smode_bsize = GET_MODE_BITSIZE (smode);
4662 mode_bsize = GET_MODE_BITSIZE (mode);
4664 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
4666 && (bitsize % BITS_PER_UNIT) == 0
4668 && (register_operand (src, word_mode)
4669 || const_int_operand (src, VOIDmode)))
4671 /* Emit standard pattern if possible. */
4672 if (smode_bsize == bitsize)
4674 emit_move_insn (adjust_address (dest, smode, 0),
4675 gen_lowpart (smode, src));
4679 /* (set (ze (mem)) (const_int)). */
4680 else if (const_int_operand (src, VOIDmode))
4682 int size = bitsize / BITS_PER_UNIT;
4683 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
4685 UNITS_PER_WORD - size);
4687 dest = adjust_address (dest, BLKmode, 0);
4688 set_mem_size (dest, size);
4689 s390_expand_movmem (dest, src_mem, GEN_INT (size));
4693 /* (set (ze (mem)) (reg)). */
4694 else if (register_operand (src, word_mode))
4697 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
4701 /* Emit st,stcmh sequence. */
4702 int stcmh_width = bitsize - 32;
4703 int size = stcmh_width / BITS_PER_UNIT;
4705 emit_move_insn (adjust_address (dest, SImode, size),
4706 gen_lowpart (SImode, src));
4707 set_mem_size (dest, size);
4708 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4709 GEN_INT (stcmh_width),
4711 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
4717 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
4718 if ((bitpos % BITS_PER_UNIT) == 0
4719 && (bitsize % BITS_PER_UNIT) == 0
4720 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
4722 && (mode == DImode || mode == SImode)
4723 && register_operand (dest, mode))
4725 /* Emit a strict_low_part pattern if possible. */
4726 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
4728 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
4729 op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src));
4730 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4731 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
4735 /* ??? There are more powerful versions of ICM that are not
4736 completely represented in the md file. */
4739 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
4740 if (TARGET_Z10 && (mode == DImode || mode == SImode))
4742 enum machine_mode mode_s = GET_MODE (src);
4744 if (mode_s == VOIDmode)
4746 /* Assume const_int etc already in the proper mode. */
4747 src = force_reg (mode, src);
4749 else if (mode_s != mode)
4751 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
4752 src = force_reg (mode_s, src);
4753 src = gen_lowpart (mode, src);
4756 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
4757 op = gen_rtx_SET (VOIDmode, op, src);
4761 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4762 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
4772 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
4773 register that holds VAL of mode MODE shifted by COUNT bits. */
4776 s390_expand_mask_and_shift (rtx val, enum machine_mode mode, rtx count)
4778 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
4779 NULL_RTX, 1, OPTAB_DIRECT);
4780 return expand_simple_binop (SImode, ASHIFT, val, count,
4781 NULL_RTX, 1, OPTAB_DIRECT);
4784 /* Structure to hold the initial parameters for a compare_and_swap operation
4785 in HImode and QImode. */
4787 struct alignment_context
4789 rtx memsi; /* SI aligned memory location. */
4790 rtx shift; /* Bit offset with regard to lsb. */
4791 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
4792 rtx modemaski; /* ~modemask */
4793 bool aligned; /* True if memory is aligned, false else. */
4796 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
4797 structure AC for transparent simplifying, if the memory alignment is known
4798 to be at least 32bit. MEM is the memory location for the actual operation
4799 and MODE its mode. */
4802 init_alignment_context (struct alignment_context *ac, rtx mem,
4803 enum machine_mode mode)
4805 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
4806 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
4809 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
4812 /* Alignment is unknown. */
4813 rtx byteoffset, addr, align;
4815 /* Force the address into a register. */
4816 addr = force_reg (Pmode, XEXP (mem, 0));
4818 /* Align it to SImode. */
4819 align = expand_simple_binop (Pmode, AND, addr,
4820 GEN_INT (-GET_MODE_SIZE (SImode)),
4821 NULL_RTX, 1, OPTAB_DIRECT);
4823 ac->memsi = gen_rtx_MEM (SImode, align);
4824 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
4825 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
4826 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
4828 /* Calculate shiftcount. */
4829 byteoffset = expand_simple_binop (Pmode, AND, addr,
4830 GEN_INT (GET_MODE_SIZE (SImode) - 1),
4831 NULL_RTX, 1, OPTAB_DIRECT);
4832 /* As we already have some offset, evaluate the remaining distance. */
4833 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
4834 NULL_RTX, 1, OPTAB_DIRECT);
4837 /* Shift is the byte count, but we need the bitcount. */
4838 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
4839 NULL_RTX, 1, OPTAB_DIRECT);
4841 /* Calculate masks. */
4842 ac->modemask = expand_simple_binop (SImode, ASHIFT,
4843 GEN_INT (GET_MODE_MASK (mode)),
4844 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
4845 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
4849 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
4850 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
4851 perform the merge in SEQ2. */
4854 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
4855 enum machine_mode mode, rtx val, rtx ins)
4862 tmp = copy_to_mode_reg (SImode, val);
4863 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
4867 *seq2 = get_insns ();
4874 /* Failed to use insv. Generate a two part shift and mask. */
4876 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
4877 *seq1 = get_insns ();
4881 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
4882 *seq2 = get_insns ();
4888 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
4889 the memory location, CMP the old value to compare MEM with and NEW_RTX the
4890 value to set if CMP == MEM. */
4893 s390_expand_cs_hqi (enum machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
4894 rtx cmp, rtx new_rtx, bool is_weak)
4896 struct alignment_context ac;
4897 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
4898 rtx res = gen_reg_rtx (SImode);
4899 rtx_code_label *csloop = NULL, *csend = NULL;
4901 gcc_assert (MEM_P (mem));
4903 init_alignment_context (&ac, mem, mode);
4905 /* Load full word. Subsequent loads are performed by CS. */
4906 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
4907 NULL_RTX, 1, OPTAB_DIRECT);
4909 /* Prepare insertions of cmp and new_rtx into the loaded value. When
4910 possible, we try to use insv to make this happen efficiently. If
4911 that fails we'll generate code both inside and outside the loop. */
4912 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
4913 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
4920 /* Start CS loop. */
4923 /* Begin assuming success. */
4924 emit_move_insn (btarget, const1_rtx);
4926 csloop = gen_label_rtx ();
4927 csend = gen_label_rtx ();
4928 emit_label (csloop);
4931 /* val = "<mem>00..0<mem>"
4932 * cmp = "00..0<cmp>00..0"
4933 * new = "00..0<new>00..0"
4939 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
4941 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
4946 /* Jump to end if we're done (likely?). */
4947 s390_emit_jump (csend, cc);
4949 /* Check for changes outside mode, and loop internal if so.
4950 Arrange the moves so that the compare is adjacent to the
4951 branch so that we can generate CRJ. */
4952 tmp = copy_to_reg (val);
4953 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
4955 cc = s390_emit_compare (NE, val, tmp);
4956 s390_emit_jump (csloop, cc);
4959 emit_move_insn (btarget, const0_rtx);
4963 /* Return the correct part of the bitfield. */
4964 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
4965 NULL_RTX, 1, OPTAB_DIRECT), 1);
4968 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
4969 and VAL the value to play with. If AFTER is true then store the value
4970 MEM holds after the operation, if AFTER is false then store the value MEM
4971 holds before the operation. If TARGET is zero then discard that value, else
4972 store it to TARGET. */
4975 s390_expand_atomic (enum machine_mode mode, enum rtx_code code,
4976 rtx target, rtx mem, rtx val, bool after)
4978 struct alignment_context ac;
4980 rtx new_rtx = gen_reg_rtx (SImode);
4981 rtx orig = gen_reg_rtx (SImode);
4982 rtx_code_label *csloop = gen_label_rtx ();
4984 gcc_assert (!target || register_operand (target, VOIDmode));
4985 gcc_assert (MEM_P (mem));
4987 init_alignment_context (&ac, mem, mode);
4989 /* Shift val to the correct bit positions.
4990 Preserve "icm", but prevent "ex icm". */
4991 if (!(ac.aligned && code == SET && MEM_P (val)))
4992 val = s390_expand_mask_and_shift (val, mode, ac.shift);
4994 /* Further preparation insns. */
4995 if (code == PLUS || code == MINUS)
4996 emit_move_insn (orig, val);
4997 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
4998 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
4999 NULL_RTX, 1, OPTAB_DIRECT);
5001 /* Load full word. Subsequent loads are performed by CS. */
5002 cmp = force_reg (SImode, ac.memsi);
5004 /* Start CS loop. */
5005 emit_label (csloop);
5006 emit_move_insn (new_rtx, cmp);
5008 /* Patch new with val at correct position. */
5013 val = expand_simple_binop (SImode, code, new_rtx, orig,
5014 NULL_RTX, 1, OPTAB_DIRECT);
5015 val = expand_simple_binop (SImode, AND, val, ac.modemask,
5016 NULL_RTX, 1, OPTAB_DIRECT);
5019 if (ac.aligned && MEM_P (val))
5020 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
5024 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
5025 NULL_RTX, 1, OPTAB_DIRECT);
5026 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
5027 NULL_RTX, 1, OPTAB_DIRECT);
5033 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
5034 NULL_RTX, 1, OPTAB_DIRECT);
5036 case MULT: /* NAND */
5037 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
5038 NULL_RTX, 1, OPTAB_DIRECT);
5039 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
5040 NULL_RTX, 1, OPTAB_DIRECT);
5046 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
5047 ac.memsi, cmp, new_rtx));
5049 /* Return the correct part of the bitfield. */
5051 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
5052 after ? new_rtx : cmp, ac.shift,
5053 NULL_RTX, 1, OPTAB_DIRECT), 1);
5056 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5057 We need to emit DTP-relative relocations. */
5059 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
5062 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
5067 fputs ("\t.long\t", file);
5070 fputs ("\t.quad\t", file);
5075 output_addr_const (file, x);
5076 fputs ("@DTPOFF", file);
5079 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
5080 /* Implement TARGET_MANGLE_TYPE. */
5083 s390_mangle_type (const_tree type)
5085 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
5086 && TARGET_LONG_DOUBLE_128)
5089 /* For all other types, use normal C++ mangling. */
5094 /* In the name of slightly smaller debug output, and to cater to
5095 general assembler lossage, recognize various UNSPEC sequences
5096 and turn them back into a direct symbol reference. */
5099 s390_delegitimize_address (rtx orig_x)
5103 orig_x = delegitimize_mem_from_attrs (orig_x);
5106 /* Extract the symbol ref from:
5107 (plus:SI (reg:SI 12 %r12)
5108 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
5109 UNSPEC_GOTOFF/PLTOFF)))
5111 (plus:SI (reg:SI 12 %r12)
5112 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
5113 UNSPEC_GOTOFF/PLTOFF)
5114 (const_int 4 [0x4])))) */
5115 if (GET_CODE (x) == PLUS
5116 && REG_P (XEXP (x, 0))
5117 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
5118 && GET_CODE (XEXP (x, 1)) == CONST)
5120 HOST_WIDE_INT offset = 0;
5122 /* The const operand. */
5123 y = XEXP (XEXP (x, 1), 0);
5125 if (GET_CODE (y) == PLUS
5126 && GET_CODE (XEXP (y, 1)) == CONST_INT)
5128 offset = INTVAL (XEXP (y, 1));
5132 if (GET_CODE (y) == UNSPEC
5133 && (XINT (y, 1) == UNSPEC_GOTOFF
5134 || XINT (y, 1) == UNSPEC_PLTOFF))
5135 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
5138 if (GET_CODE (x) != MEM)
5142 if (GET_CODE (x) == PLUS
5143 && GET_CODE (XEXP (x, 1)) == CONST
5144 && GET_CODE (XEXP (x, 0)) == REG
5145 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5147 y = XEXP (XEXP (x, 1), 0);
5148 if (GET_CODE (y) == UNSPEC
5149 && XINT (y, 1) == UNSPEC_GOT)
5150 y = XVECEXP (y, 0, 0);
5154 else if (GET_CODE (x) == CONST)
5156 /* Extract the symbol ref from:
5157 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
5158 UNSPEC_PLT/GOTENT))) */
5161 if (GET_CODE (y) == UNSPEC
5162 && (XINT (y, 1) == UNSPEC_GOTENT
5163 || XINT (y, 1) == UNSPEC_PLT))
5164 y = XVECEXP (y, 0, 0);
5171 if (GET_MODE (orig_x) != Pmode)
5173 if (GET_MODE (orig_x) == BLKmode)
5175 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
5182 /* Output operand OP to stdio stream FILE.
5183 OP is an address (register + offset) which is not used to address data;
5184 instead the rightmost bits are interpreted as the value. */
5187 print_shift_count_operand (FILE *file, rtx op)
5189 HOST_WIDE_INT offset;
5192 /* Extract base register and offset. */
5193 if (!s390_decompose_shift_count (op, &base, &offset))
5199 gcc_assert (GET_CODE (base) == REG);
5200 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
5201 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
5204 /* Offsets are constricted to twelve bits. */
5205 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
5207 fprintf (file, "(%s)", reg_names[REGNO (base)]);
5210 /* Returns -1 if the function should not be made hotpatchable. Otherwise it
5211 returns a number >= 0 that is the desired size of the hotpatch trampoline
5214 static int s390_function_num_hotpatch_trampoline_halfwords (tree decl,
5219 if (DECL_DECLARED_INLINE_P (decl)
5220 || DECL_ARTIFICIAL (decl)
5221 || MAIN_NAME_P (DECL_NAME (decl)))
5223 /* - Explicitly inlined functions cannot be hotpatched.
5224 - Artificial functions need not be hotpatched.
5225 - Making the main function hotpatchable is useless. */
5228 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
5229 if (attr || s390_hotpatch_trampoline_halfwords >= 0)
5231 if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (decl)))
5234 warning (OPT_Wattributes, "function %qE with the %qs attribute"
5235 " is not hotpatchable", DECL_NAME (decl), "always_inline");
5241 get_hotpatch_attribute (attr) : s390_hotpatch_trampoline_halfwords;
5248 /* Hook to determine if one function can safely inline another. */
5251 s390_can_inline_p (tree caller, tree callee)
5253 if (s390_function_num_hotpatch_trampoline_halfwords (callee, false) >= 0)
5256 return default_target_can_inline_p (caller, callee);
5259 /* Write the extra assembler code needed to declare a function properly. */
5262 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
5265 int hotpatch_trampoline_halfwords = -1;
5269 hotpatch_trampoline_halfwords =
5270 s390_function_num_hotpatch_trampoline_halfwords (decl, true);
5271 if (hotpatch_trampoline_halfwords >= 0
5272 && decl_function_context (decl) != NULL_TREE)
5274 warning_at (DECL_SOURCE_LOCATION (decl), OPT_mhotpatch,
5275 "hotpatching is not compatible with nested functions");
5276 hotpatch_trampoline_halfwords = -1;
5280 if (hotpatch_trampoline_halfwords > 0)
5284 /* Add a trampoline code area before the function label and initialize it
5285 with two-byte nop instructions. This area can be overwritten with code
5286 that jumps to a patched version of the function. */
5287 for (i = 0; i < hotpatch_trampoline_halfwords; i++)
5288 asm_fprintf (asm_out_file, "\tnopr\t%%r7\n");
5289 /* Note: The function label must be aligned so that (a) the bytes of the
5290 following nop do not cross a cacheline boundary, and (b) a jump address
5291 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
5292 stored directly before the label without crossing a cacheline
5293 boundary. All this is necessary to make sure the trampoline code can
5294 be changed atomically. */
5297 ASM_OUTPUT_LABEL (asm_out_file, fname);
5299 /* Output a four-byte nop if hotpatching is enabled. This can be overwritten
5300 atomically with a relative backwards jump to the trampoline area. */
5301 if (hotpatch_trampoline_halfwords >= 0)
5302 asm_fprintf (asm_out_file, "\tnop\t0\n");
5305 /* Output machine-dependent UNSPECs occurring in address constant X
5306 in assembler syntax to stdio stream FILE. Returns true if the
5307 constant X could be recognized, false otherwise. */
5310 s390_output_addr_const_extra (FILE *file, rtx x)
5312 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
5313 switch (XINT (x, 1))
5316 output_addr_const (file, XVECEXP (x, 0, 0));
5317 fprintf (file, "@GOTENT");
5320 output_addr_const (file, XVECEXP (x, 0, 0));
5321 fprintf (file, "@GOT");
5324 output_addr_const (file, XVECEXP (x, 0, 0));
5325 fprintf (file, "@GOTOFF");
5328 output_addr_const (file, XVECEXP (x, 0, 0));
5329 fprintf (file, "@PLT");
5332 output_addr_const (file, XVECEXP (x, 0, 0));
5333 fprintf (file, "@PLTOFF");
5336 output_addr_const (file, XVECEXP (x, 0, 0));
5337 fprintf (file, "@TLSGD");
5340 assemble_name (file, get_some_local_dynamic_name ());
5341 fprintf (file, "@TLSLDM");
5344 output_addr_const (file, XVECEXP (x, 0, 0));
5345 fprintf (file, "@DTPOFF");
5348 output_addr_const (file, XVECEXP (x, 0, 0));
5349 fprintf (file, "@NTPOFF");
5351 case UNSPEC_GOTNTPOFF:
5352 output_addr_const (file, XVECEXP (x, 0, 0));
5353 fprintf (file, "@GOTNTPOFF");
5355 case UNSPEC_INDNTPOFF:
5356 output_addr_const (file, XVECEXP (x, 0, 0));
5357 fprintf (file, "@INDNTPOFF");
5361 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
5362 switch (XINT (x, 1))
5364 case UNSPEC_POOL_OFFSET:
5365 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
5366 output_addr_const (file, x);
5372 /* Output address operand ADDR in assembler syntax to
5373 stdio stream FILE. */
5376 print_operand_address (FILE *file, rtx addr)
5378 struct s390_address ad;
5380 if (s390_loadrelative_operand_p (addr, NULL, NULL))
5384 output_operand_lossage ("symbolic memory references are "
5385 "only supported on z10 or later");
5388 output_addr_const (file, addr);
5392 if (!s390_decompose_address (addr, &ad)
5393 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5394 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
5395 output_operand_lossage ("cannot decompose address");
5398 output_addr_const (file, ad.disp);
5400 fprintf (file, "0");
5402 if (ad.base && ad.indx)
5403 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
5404 reg_names[REGNO (ad.base)]);
5406 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5409 /* Output operand X in assembler syntax to stdio stream FILE.
5410 CODE specified the format flag. The following format flags
5413 'C': print opcode suffix for branch condition.
5414 'D': print opcode suffix for inverse branch condition.
5415 'E': print opcode suffix for branch on index instruction.
5416 'G': print the size of the operand in bytes.
5417 'J': print tls_load/tls_gdcall/tls_ldcall suffix
5418 'M': print the second word of a TImode operand.
5419 'N': print the second word of a DImode operand.
5420 'O': print only the displacement of a memory reference.
5421 'R': print only the base register of a memory reference.
5422 'S': print S-type memory reference (base+displacement).
5423 'Y': print shift count operand.
5425 'b': print integer X as if it's an unsigned byte.
5426 'c': print integer X as if it's an signed byte.
5427 'e': "end" of DImode contiguous bitmask X.
5428 'f': "end" of SImode contiguous bitmask X.
5429 'h': print integer X as if it's a signed halfword.
5430 'i': print the first nonzero HImode part of X.
5431 'j': print the first HImode part unequal to -1 of X.
5432 'k': print the first nonzero SImode part of X.
5433 'm': print the first SImode part unequal to -1 of X.
5434 'o': print integer X as if it's an unsigned 32bit word.
5435 's': "start" of DImode contiguous bitmask X.
5436 't': "start" of SImode contiguous bitmask X.
5437 'x': print integer X as if it's an unsigned halfword.
5441 print_operand (FILE *file, rtx x, int code)
5448 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
5452 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
5456 if (GET_CODE (x) == LE)
5457 fprintf (file, "l");
5458 else if (GET_CODE (x) == GT)
5459 fprintf (file, "h");
5461 output_operand_lossage ("invalid comparison operator "
5462 "for 'E' output modifier");
5466 if (GET_CODE (x) == SYMBOL_REF)
5468 fprintf (file, "%s", ":tls_load:");
5469 output_addr_const (file, x);
5471 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
5473 fprintf (file, "%s", ":tls_gdcall:");
5474 output_addr_const (file, XVECEXP (x, 0, 0));
5476 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
5478 fprintf (file, "%s", ":tls_ldcall:");
5479 const char *name = get_some_local_dynamic_name ();
5481 assemble_name (file, name);
5484 output_operand_lossage ("invalid reference for 'J' output modifier");
5488 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
5493 struct s390_address ad;
5498 output_operand_lossage ("memory reference expected for "
5499 "'O' output modifier");
5503 ret = s390_decompose_address (XEXP (x, 0), &ad);
5506 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5509 output_operand_lossage ("invalid address for 'O' output modifier");
5514 output_addr_const (file, ad.disp);
5516 fprintf (file, "0");
5522 struct s390_address ad;
5527 output_operand_lossage ("memory reference expected for "
5528 "'R' output modifier");
5532 ret = s390_decompose_address (XEXP (x, 0), &ad);
5535 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5538 output_operand_lossage ("invalid address for 'R' output modifier");
5543 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
5545 fprintf (file, "0");
5551 struct s390_address ad;
5556 output_operand_lossage ("memory reference expected for "
5557 "'S' output modifier");
5560 ret = s390_decompose_address (XEXP (x, 0), &ad);
5563 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5566 output_operand_lossage ("invalid address for 'S' output modifier");
5571 output_addr_const (file, ad.disp);
5573 fprintf (file, "0");
5576 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5581 if (GET_CODE (x) == REG)
5582 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5583 else if (GET_CODE (x) == MEM)
5584 x = change_address (x, VOIDmode,
5585 plus_constant (Pmode, XEXP (x, 0), 4));
5587 output_operand_lossage ("register or memory expression expected "
5588 "for 'N' output modifier");
5592 if (GET_CODE (x) == REG)
5593 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5594 else if (GET_CODE (x) == MEM)
5595 x = change_address (x, VOIDmode,
5596 plus_constant (Pmode, XEXP (x, 0), 8));
5598 output_operand_lossage ("register or memory expression expected "
5599 "for 'M' output modifier");
5603 print_shift_count_operand (file, x);
5607 switch (GET_CODE (x))
5610 fprintf (file, "%s", reg_names[REGNO (x)]);
5614 output_address (XEXP (x, 0));
5621 output_addr_const (file, x);
5634 ival = ((ival & 0xff) ^ 0x80) - 0x80;
5640 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
5643 ival = s390_extract_part (x, HImode, 0);
5646 ival = s390_extract_part (x, HImode, -1);
5649 ival = s390_extract_part (x, SImode, 0);
5652 ival = s390_extract_part (x, SImode, -1);
5663 len = (code == 's' || code == 'e' ? 64 : 32);
5664 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
5666 if (code == 's' || code == 't')
5667 ival = 64 - pos - len;
5669 ival = 64 - 1 - pos;
5673 output_operand_lossage ("invalid constant for output modifier '%c'", code);
5675 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
5679 gcc_assert (GET_MODE (x) == VOIDmode);
5681 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
5682 else if (code == 'x')
5683 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
5684 else if (code == 'h')
5685 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5686 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
5690 output_operand_lossage ("invalid constant - try using "
5691 "an output modifier");
5693 output_operand_lossage ("invalid constant for output modifier '%c'",
5700 output_operand_lossage ("invalid expression - try using "
5701 "an output modifier");
5703 output_operand_lossage ("invalid expression for output "
5704 "modifier '%c'", code);
5709 /* Target hook for assembling integer objects. We need to define it
5710 here to work a round a bug in some versions of GAS, which couldn't
5711 handle values smaller than INT_MIN when printed in decimal. */
5714 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
5716 if (size == 8 && aligned_p
5717 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
5719 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
5723 return default_assemble_integer (x, size, aligned_p);
5726 /* Returns true if register REGNO is used for forming
5727 a memory address in expression X. */
5730 reg_used_in_mem_p (int regno, rtx x)
5732 enum rtx_code code = GET_CODE (x);
5738 if (refers_to_regno_p (regno, regno+1,
5742 else if (code == SET
5743 && GET_CODE (SET_DEST (x)) == PC)
5745 if (refers_to_regno_p (regno, regno+1,
5750 fmt = GET_RTX_FORMAT (code);
5751 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5754 && reg_used_in_mem_p (regno, XEXP (x, i)))
5757 else if (fmt[i] == 'E')
5758 for (j = 0; j < XVECLEN (x, i); j++)
5759 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
5765 /* Returns true if expression DEP_RTX sets an address register
5766 used by instruction INSN to address memory. */
5769 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
5773 if (NONJUMP_INSN_P (dep_rtx))
5774 dep_rtx = PATTERN (dep_rtx);
5776 if (GET_CODE (dep_rtx) == SET)
5778 target = SET_DEST (dep_rtx);
5779 if (GET_CODE (target) == STRICT_LOW_PART)
5780 target = XEXP (target, 0);
5781 while (GET_CODE (target) == SUBREG)
5782 target = SUBREG_REG (target);
5784 if (GET_CODE (target) == REG)
5786 int regno = REGNO (target);
5788 if (s390_safe_attr_type (insn) == TYPE_LA)
5790 pat = PATTERN (insn);
5791 if (GET_CODE (pat) == PARALLEL)
5793 gcc_assert (XVECLEN (pat, 0) == 2);
5794 pat = XVECEXP (pat, 0, 0);
5796 gcc_assert (GET_CODE (pat) == SET);
5797 return refers_to_regno_p (regno, regno+1, SET_SRC (pat), 0);
5799 else if (get_attr_atype (insn) == ATYPE_AGEN)
5800 return reg_used_in_mem_p (regno, PATTERN (insn));
5806 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
5809 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
5811 rtx dep_rtx = PATTERN (dep_insn);
5814 if (GET_CODE (dep_rtx) == SET
5815 && addr_generation_dependency_p (dep_rtx, insn))
5817 else if (GET_CODE (dep_rtx) == PARALLEL)
5819 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
5821 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
5829 /* A C statement (sans semicolon) to update the integer scheduling priority
5830 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
5831 reduce the priority to execute INSN later. Do not define this macro if
5832 you do not need to adjust the scheduling priorities of insns.
5834 A STD instruction should be scheduled earlier,
5835 in order to use the bypass. */
5837 s390_adjust_priority (rtx_insn *insn, int priority)
5839 if (! INSN_P (insn))
5842 if (s390_tune != PROCESSOR_2084_Z990
5843 && s390_tune != PROCESSOR_2094_Z9_109
5844 && s390_tune != PROCESSOR_2097_Z10
5845 && s390_tune != PROCESSOR_2817_Z196
5846 && s390_tune != PROCESSOR_2827_ZEC12)
5849 switch (s390_safe_attr_type (insn))
5853 priority = priority << 3;
5857 priority = priority << 1;
5866 /* The number of instructions that can be issued per cycle. */
5869 s390_issue_rate (void)
5873 case PROCESSOR_2084_Z990:
5874 case PROCESSOR_2094_Z9_109:
5875 case PROCESSOR_2817_Z196:
5877 case PROCESSOR_2097_Z10:
5878 case PROCESSOR_2827_ZEC12:
5886 s390_first_cycle_multipass_dfa_lookahead (void)
5891 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
5892 Fix up MEMs as required. */
5895 annotate_constant_pool_refs (rtx *x)
5900 gcc_assert (GET_CODE (*x) != SYMBOL_REF
5901 || !CONSTANT_POOL_ADDRESS_P (*x));
5903 /* Literal pool references can only occur inside a MEM ... */
5904 if (GET_CODE (*x) == MEM)
5906 rtx memref = XEXP (*x, 0);
5908 if (GET_CODE (memref) == SYMBOL_REF
5909 && CONSTANT_POOL_ADDRESS_P (memref))
5911 rtx base = cfun->machine->base_reg;
5912 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
5915 *x = replace_equiv_address (*x, addr);
5919 if (GET_CODE (memref) == CONST
5920 && GET_CODE (XEXP (memref, 0)) == PLUS
5921 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
5922 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
5923 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
5925 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
5926 rtx sym = XEXP (XEXP (memref, 0), 0);
5927 rtx base = cfun->machine->base_reg;
5928 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5931 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
5936 /* ... or a load-address type pattern. */
5937 if (GET_CODE (*x) == SET)
5939 rtx addrref = SET_SRC (*x);
5941 if (GET_CODE (addrref) == SYMBOL_REF
5942 && CONSTANT_POOL_ADDRESS_P (addrref))
5944 rtx base = cfun->machine->base_reg;
5945 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
5948 SET_SRC (*x) = addr;
5952 if (GET_CODE (addrref) == CONST
5953 && GET_CODE (XEXP (addrref, 0)) == PLUS
5954 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
5955 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
5956 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
5958 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
5959 rtx sym = XEXP (XEXP (addrref, 0), 0);
5960 rtx base = cfun->machine->base_reg;
5961 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5964 SET_SRC (*x) = plus_constant (Pmode, addr, off);
5969 /* Annotate LTREL_BASE as well. */
5970 if (GET_CODE (*x) == UNSPEC
5971 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
5973 rtx base = cfun->machine->base_reg;
5974 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
5979 fmt = GET_RTX_FORMAT (GET_CODE (*x));
5980 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
5984 annotate_constant_pool_refs (&XEXP (*x, i));
5986 else if (fmt[i] == 'E')
5988 for (j = 0; j < XVECLEN (*x, i); j++)
5989 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
5994 /* Split all branches that exceed the maximum distance.
5995 Returns true if this created a new literal pool entry. */
5998 s390_split_branches (void)
6000 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
6001 int new_literal = 0, ret;
6006 /* We need correct insn addresses. */
6008 shorten_branches (get_insns ());
6010 /* Find all branches that exceed 64KB, and split them. */
6012 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6014 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
6017 pat = PATTERN (insn);
6018 if (GET_CODE (pat) == PARALLEL)
6019 pat = XVECEXP (pat, 0, 0);
6020 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
6023 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
6025 label = &SET_SRC (pat);
6027 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
6029 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
6030 label = &XEXP (SET_SRC (pat), 1);
6031 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
6032 label = &XEXP (SET_SRC (pat), 2);
6039 if (get_attr_length (insn) <= 4)
6042 /* We are going to use the return register as scratch register,
6043 make sure it will be saved/restored by the prologue/epilogue. */
6044 cfun_frame_layout.save_return_addr_p = 1;
6049 rtx mem = force_const_mem (Pmode, *label);
6050 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, mem), insn);
6051 INSN_ADDRESSES_NEW (set_insn, -1);
6052 annotate_constant_pool_refs (&PATTERN (set_insn));
6059 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
6060 UNSPEC_LTREL_OFFSET);
6061 target = gen_rtx_CONST (Pmode, target);
6062 target = force_const_mem (Pmode, target);
6063 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, target), insn);
6064 INSN_ADDRESSES_NEW (set_insn, -1);
6065 annotate_constant_pool_refs (&PATTERN (set_insn));
6067 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
6068 cfun->machine->base_reg),
6070 target = gen_rtx_PLUS (Pmode, temp_reg, target);
6073 ret = validate_change (insn, label, target, 0);
6081 /* Find an annotated literal pool symbol referenced in RTX X,
6082 and store it at REF. Will abort if X contains references to
6083 more than one such pool symbol; multiple references to the same
6084 symbol are allowed, however.
6086 The rtx pointed to by REF must be initialized to NULL_RTX
6087 by the caller before calling this routine. */
6090 find_constant_pool_ref (rtx x, rtx *ref)
6095 /* Ignore LTREL_BASE references. */
6096 if (GET_CODE (x) == UNSPEC
6097 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6099 /* Likewise POOL_ENTRY insns. */
6100 if (GET_CODE (x) == UNSPEC_VOLATILE
6101 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
6104 gcc_assert (GET_CODE (x) != SYMBOL_REF
6105 || !CONSTANT_POOL_ADDRESS_P (x));
6107 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
6109 rtx sym = XVECEXP (x, 0, 0);
6110 gcc_assert (GET_CODE (sym) == SYMBOL_REF
6111 && CONSTANT_POOL_ADDRESS_P (sym));
6113 if (*ref == NULL_RTX)
6116 gcc_assert (*ref == sym);
6121 fmt = GET_RTX_FORMAT (GET_CODE (x));
6122 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6126 find_constant_pool_ref (XEXP (x, i), ref);
6128 else if (fmt[i] == 'E')
6130 for (j = 0; j < XVECLEN (x, i); j++)
6131 find_constant_pool_ref (XVECEXP (x, i, j), ref);
6136 /* Replace every reference to the annotated literal pool
6137 symbol REF in X by its base plus OFFSET. */
6140 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
6145 gcc_assert (*x != ref);
6147 if (GET_CODE (*x) == UNSPEC
6148 && XINT (*x, 1) == UNSPEC_LTREF
6149 && XVECEXP (*x, 0, 0) == ref)
6151 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
6155 if (GET_CODE (*x) == PLUS
6156 && GET_CODE (XEXP (*x, 1)) == CONST_INT
6157 && GET_CODE (XEXP (*x, 0)) == UNSPEC
6158 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
6159 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
6161 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
6162 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
6166 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6167 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6171 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
6173 else if (fmt[i] == 'E')
6175 for (j = 0; j < XVECLEN (*x, i); j++)
6176 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
6181 /* Check whether X contains an UNSPEC_LTREL_BASE.
6182 Return its constant pool symbol if found, NULL_RTX otherwise. */
6185 find_ltrel_base (rtx x)
6190 if (GET_CODE (x) == UNSPEC
6191 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6192 return XVECEXP (x, 0, 0);
6194 fmt = GET_RTX_FORMAT (GET_CODE (x));
6195 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6199 rtx fnd = find_ltrel_base (XEXP (x, i));
6203 else if (fmt[i] == 'E')
6205 for (j = 0; j < XVECLEN (x, i); j++)
6207 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
6217 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
6220 replace_ltrel_base (rtx *x)
6225 if (GET_CODE (*x) == UNSPEC
6226 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
6228 *x = XVECEXP (*x, 0, 1);
6232 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6233 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6237 replace_ltrel_base (&XEXP (*x, i));
6239 else if (fmt[i] == 'E')
6241 for (j = 0; j < XVECLEN (*x, i); j++)
6242 replace_ltrel_base (&XVECEXP (*x, i, j));
6248 /* We keep a list of constants which we have to add to internal
6249 constant tables in the middle of large functions. */
6251 #define NR_C_MODES 11
6252 enum machine_mode constant_modes[NR_C_MODES] =
6254 TFmode, TImode, TDmode,
6255 DFmode, DImode, DDmode,
6256 SFmode, SImode, SDmode,
6263 struct constant *next;
6265 rtx_code_label *label;
6268 struct constant_pool
6270 struct constant_pool *next;
6271 rtx_insn *first_insn;
6272 rtx_insn *pool_insn;
6274 rtx_insn *emit_pool_after;
6276 struct constant *constants[NR_C_MODES];
6277 struct constant *execute;
6278 rtx_code_label *label;
6282 /* Allocate new constant_pool structure. */
6284 static struct constant_pool *
6285 s390_alloc_pool (void)
6287 struct constant_pool *pool;
6290 pool = (struct constant_pool *) xmalloc (sizeof *pool);
6292 for (i = 0; i < NR_C_MODES; i++)
6293 pool->constants[i] = NULL;
6295 pool->execute = NULL;
6296 pool->label = gen_label_rtx ();
6297 pool->first_insn = NULL;
6298 pool->pool_insn = NULL;
6299 pool->insns = BITMAP_ALLOC (NULL);
6301 pool->emit_pool_after = NULL;
6306 /* Create new constant pool covering instructions starting at INSN
6307 and chain it to the end of POOL_LIST. */
6309 static struct constant_pool *
6310 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
6312 struct constant_pool *pool, **prev;
6314 pool = s390_alloc_pool ();
6315 pool->first_insn = insn;
6317 for (prev = pool_list; *prev; prev = &(*prev)->next)
6324 /* End range of instructions covered by POOL at INSN and emit
6325 placeholder insn representing the pool. */
6328 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
6330 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
6333 insn = get_last_insn ();
6335 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
6336 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6339 /* Add INSN to the list of insns covered by POOL. */
6342 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
6344 bitmap_set_bit (pool->insns, INSN_UID (insn));
6347 /* Return pool out of POOL_LIST that covers INSN. */
6349 static struct constant_pool *
6350 s390_find_pool (struct constant_pool *pool_list, rtx insn)
6352 struct constant_pool *pool;
6354 for (pool = pool_list; pool; pool = pool->next)
6355 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
6361 /* Add constant VAL of mode MODE to the constant pool POOL. */
6364 s390_add_constant (struct constant_pool *pool, rtx val, enum machine_mode mode)
6369 for (i = 0; i < NR_C_MODES; i++)
6370 if (constant_modes[i] == mode)
6372 gcc_assert (i != NR_C_MODES);
6374 for (c = pool->constants[i]; c != NULL; c = c->next)
6375 if (rtx_equal_p (val, c->value))
6380 c = (struct constant *) xmalloc (sizeof *c);
6382 c->label = gen_label_rtx ();
6383 c->next = pool->constants[i];
6384 pool->constants[i] = c;
6385 pool->size += GET_MODE_SIZE (mode);
6389 /* Return an rtx that represents the offset of X from the start of
6393 s390_pool_offset (struct constant_pool *pool, rtx x)
6397 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
6398 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
6399 UNSPEC_POOL_OFFSET);
6400 return gen_rtx_CONST (GET_MODE (x), x);
6403 /* Find constant VAL of mode MODE in the constant pool POOL.
6404 Return an RTX describing the distance from the start of
6405 the pool to the location of the new constant. */
6408 s390_find_constant (struct constant_pool *pool, rtx val,
6409 enum machine_mode mode)
6414 for (i = 0; i < NR_C_MODES; i++)
6415 if (constant_modes[i] == mode)
6417 gcc_assert (i != NR_C_MODES);
6419 for (c = pool->constants[i]; c != NULL; c = c->next)
6420 if (rtx_equal_p (val, c->value))
6425 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6428 /* Check whether INSN is an execute. Return the label_ref to its
6429 execute target template if so, NULL_RTX otherwise. */
6432 s390_execute_label (rtx insn)
6434 if (NONJUMP_INSN_P (insn)
6435 && GET_CODE (PATTERN (insn)) == PARALLEL
6436 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
6437 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
6438 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
6443 /* Add execute target for INSN to the constant pool POOL. */
6446 s390_add_execute (struct constant_pool *pool, rtx insn)
6450 for (c = pool->execute; c != NULL; c = c->next)
6451 if (INSN_UID (insn) == INSN_UID (c->value))
6456 c = (struct constant *) xmalloc (sizeof *c);
6458 c->label = gen_label_rtx ();
6459 c->next = pool->execute;
6465 /* Find execute target for INSN in the constant pool POOL.
6466 Return an RTX describing the distance from the start of
6467 the pool to the location of the execute target. */
6470 s390_find_execute (struct constant_pool *pool, rtx insn)
6474 for (c = pool->execute; c != NULL; c = c->next)
6475 if (INSN_UID (insn) == INSN_UID (c->value))
6480 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6483 /* For an execute INSN, extract the execute target template. */
6486 s390_execute_target (rtx insn)
6488 rtx pattern = PATTERN (insn);
6489 gcc_assert (s390_execute_label (insn));
6491 if (XVECLEN (pattern, 0) == 2)
6493 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
6497 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
6500 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
6501 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
6503 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
6509 /* Indicate that INSN cannot be duplicated. This is the case for
6510 execute insns that carry a unique label. */
6513 s390_cannot_copy_insn_p (rtx_insn *insn)
6515 rtx label = s390_execute_label (insn);
6516 return label && label != const0_rtx;
6519 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
6520 do not emit the pool base label. */
6523 s390_dump_pool (struct constant_pool *pool, bool remote_label)
6526 rtx_insn *insn = pool->pool_insn;
6529 /* Switch to rodata section. */
6530 if (TARGET_CPU_ZARCH)
6532 insn = emit_insn_after (gen_pool_section_start (), insn);
6533 INSN_ADDRESSES_NEW (insn, -1);
6536 /* Ensure minimum pool alignment. */
6537 if (TARGET_CPU_ZARCH)
6538 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
6540 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
6541 INSN_ADDRESSES_NEW (insn, -1);
6543 /* Emit pool base label. */
6546 insn = emit_label_after (pool->label, insn);
6547 INSN_ADDRESSES_NEW (insn, -1);
6550 /* Dump constants in descending alignment requirement order,
6551 ensuring proper alignment for every constant. */
6552 for (i = 0; i < NR_C_MODES; i++)
6553 for (c = pool->constants[i]; c; c = c->next)
6555 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
6556 rtx value = copy_rtx (c->value);
6557 if (GET_CODE (value) == CONST
6558 && GET_CODE (XEXP (value, 0)) == UNSPEC
6559 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
6560 && XVECLEN (XEXP (value, 0), 0) == 1)
6561 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
6563 insn = emit_label_after (c->label, insn);
6564 INSN_ADDRESSES_NEW (insn, -1);
6566 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
6567 gen_rtvec (1, value),
6568 UNSPECV_POOL_ENTRY);
6569 insn = emit_insn_after (value, insn);
6570 INSN_ADDRESSES_NEW (insn, -1);
6573 /* Ensure minimum alignment for instructions. */
6574 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
6575 INSN_ADDRESSES_NEW (insn, -1);
6577 /* Output in-pool execute template insns. */
6578 for (c = pool->execute; c; c = c->next)
6580 insn = emit_label_after (c->label, insn);
6581 INSN_ADDRESSES_NEW (insn, -1);
6583 insn = emit_insn_after (s390_execute_target (c->value), insn);
6584 INSN_ADDRESSES_NEW (insn, -1);
6587 /* Switch back to previous section. */
6588 if (TARGET_CPU_ZARCH)
6590 insn = emit_insn_after (gen_pool_section_end (), insn);
6591 INSN_ADDRESSES_NEW (insn, -1);
6594 insn = emit_barrier_after (insn);
6595 INSN_ADDRESSES_NEW (insn, -1);
6597 /* Remove placeholder insn. */
6598 remove_insn (pool->pool_insn);
6601 /* Free all memory used by POOL. */
6604 s390_free_pool (struct constant_pool *pool)
6606 struct constant *c, *next;
6609 for (i = 0; i < NR_C_MODES; i++)
6610 for (c = pool->constants[i]; c; c = next)
6616 for (c = pool->execute; c; c = next)
6622 BITMAP_FREE (pool->insns);
6627 /* Collect main literal pool. Return NULL on overflow. */
6629 static struct constant_pool *
6630 s390_mainpool_start (void)
6632 struct constant_pool *pool;
6635 pool = s390_alloc_pool ();
6637 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6639 if (NONJUMP_INSN_P (insn)
6640 && GET_CODE (PATTERN (insn)) == SET
6641 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
6642 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
6644 /* There might be two main_pool instructions if base_reg
6645 is call-clobbered; one for shrink-wrapped code and one
6646 for the rest. We want to keep the first. */
6647 if (pool->pool_insn)
6649 insn = PREV_INSN (insn);
6650 delete_insn (NEXT_INSN (insn));
6653 pool->pool_insn = insn;
6656 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6658 s390_add_execute (pool, insn);
6660 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
6662 rtx pool_ref = NULL_RTX;
6663 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6666 rtx constant = get_pool_constant (pool_ref);
6667 enum machine_mode mode = get_pool_mode (pool_ref);
6668 s390_add_constant (pool, constant, mode);
6672 /* If hot/cold partitioning is enabled we have to make sure that
6673 the literal pool is emitted in the same section where the
6674 initialization of the literal pool base pointer takes place.
6675 emit_pool_after is only used in the non-overflow case on non
6676 Z cpus where we can emit the literal pool at the end of the
6677 function body within the text section. */
6679 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
6680 && !pool->emit_pool_after)
6681 pool->emit_pool_after = PREV_INSN (insn);
6684 gcc_assert (pool->pool_insn || pool->size == 0);
6686 if (pool->size >= 4096)
6688 /* We're going to chunkify the pool, so remove the main
6689 pool placeholder insn. */
6690 remove_insn (pool->pool_insn);
6692 s390_free_pool (pool);
6696 /* If the functions ends with the section where the literal pool
6697 should be emitted set the marker to its end. */
6698 if (pool && !pool->emit_pool_after)
6699 pool->emit_pool_after = get_last_insn ();
6704 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6705 Modify the current function to output the pool constants as well as
6706 the pool register setup instruction. */
6709 s390_mainpool_finish (struct constant_pool *pool)
6711 rtx base_reg = cfun->machine->base_reg;
6713 /* If the pool is empty, we're done. */
6714 if (pool->size == 0)
6716 /* We don't actually need a base register after all. */
6717 cfun->machine->base_reg = NULL_RTX;
6719 if (pool->pool_insn)
6720 remove_insn (pool->pool_insn);
6721 s390_free_pool (pool);
6725 /* We need correct insn addresses. */
6726 shorten_branches (get_insns ());
6728 /* On zSeries, we use a LARL to load the pool register. The pool is
6729 located in the .rodata section, so we emit it after the function. */
6730 if (TARGET_CPU_ZARCH)
6732 rtx set = gen_main_base_64 (base_reg, pool->label);
6733 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
6734 INSN_ADDRESSES_NEW (insn, -1);
6735 remove_insn (pool->pool_insn);
6737 insn = get_last_insn ();
6738 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6739 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6741 s390_dump_pool (pool, 0);
6744 /* On S/390, if the total size of the function's code plus literal pool
6745 does not exceed 4096 bytes, we use BASR to set up a function base
6746 pointer, and emit the literal pool at the end of the function. */
6747 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
6748 + pool->size + 8 /* alignment slop */ < 4096)
6750 rtx set = gen_main_base_31_small (base_reg, pool->label);
6751 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
6752 INSN_ADDRESSES_NEW (insn, -1);
6753 remove_insn (pool->pool_insn);
6755 insn = emit_label_after (pool->label, insn);
6756 INSN_ADDRESSES_NEW (insn, -1);
6758 /* emit_pool_after will be set by s390_mainpool_start to the
6759 last insn of the section where the literal pool should be
6761 insn = pool->emit_pool_after;
6763 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6764 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6766 s390_dump_pool (pool, 1);
6769 /* Otherwise, we emit an inline literal pool and use BASR to branch
6770 over it, setting up the pool register at the same time. */
6773 rtx_code_label *pool_end = gen_label_rtx ();
6775 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
6776 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
6777 JUMP_LABEL (insn) = pool_end;
6778 INSN_ADDRESSES_NEW (insn, -1);
6779 remove_insn (pool->pool_insn);
6781 insn = emit_label_after (pool->label, insn);
6782 INSN_ADDRESSES_NEW (insn, -1);
6784 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6785 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6787 insn = emit_label_after (pool_end, pool->pool_insn);
6788 INSN_ADDRESSES_NEW (insn, -1);
6790 s390_dump_pool (pool, 1);
6794 /* Replace all literal pool references. */
6796 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
6799 replace_ltrel_base (&PATTERN (insn));
6801 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
6803 rtx addr, pool_ref = NULL_RTX;
6804 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6807 if (s390_execute_label (insn))
6808 addr = s390_find_execute (pool, insn);
6810 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
6811 get_pool_mode (pool_ref));
6813 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
6814 INSN_CODE (insn) = -1;
6820 /* Free the pool. */
6821 s390_free_pool (pool);
6824 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6825 We have decided we cannot use this pool, so revert all changes
6826 to the current function that were done by s390_mainpool_start. */
6828 s390_mainpool_cancel (struct constant_pool *pool)
6830 /* We didn't actually change the instruction stream, so simply
6831 free the pool memory. */
6832 s390_free_pool (pool);
6836 /* Chunkify the literal pool. */
6838 #define S390_POOL_CHUNK_MIN 0xc00
6839 #define S390_POOL_CHUNK_MAX 0xe00
6841 static struct constant_pool *
6842 s390_chunkify_start (void)
6844 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
6847 rtx pending_ltrel = NULL_RTX;
6850 rtx (*gen_reload_base) (rtx, rtx) =
6851 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
6854 /* We need correct insn addresses. */
6856 shorten_branches (get_insns ());
6858 /* Scan all insns and move literals to pool chunks. */
6860 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6862 bool section_switch_p = false;
6864 /* Check for pending LTREL_BASE. */
6867 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
6870 gcc_assert (ltrel_base == pending_ltrel);
6871 pending_ltrel = NULL_RTX;
6875 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6878 curr_pool = s390_start_pool (&pool_list, insn);
6880 s390_add_execute (curr_pool, insn);
6881 s390_add_pool_insn (curr_pool, insn);
6883 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
6885 rtx pool_ref = NULL_RTX;
6886 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6889 rtx constant = get_pool_constant (pool_ref);
6890 enum machine_mode mode = get_pool_mode (pool_ref);
6893 curr_pool = s390_start_pool (&pool_list, insn);
6895 s390_add_constant (curr_pool, constant, mode);
6896 s390_add_pool_insn (curr_pool, insn);
6898 /* Don't split the pool chunk between a LTREL_OFFSET load
6899 and the corresponding LTREL_BASE. */
6900 if (GET_CODE (constant) == CONST
6901 && GET_CODE (XEXP (constant, 0)) == UNSPEC
6902 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
6904 gcc_assert (!pending_ltrel);
6905 pending_ltrel = pool_ref;
6910 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
6913 s390_add_pool_insn (curr_pool, insn);
6914 /* An LTREL_BASE must follow within the same basic block. */
6915 gcc_assert (!pending_ltrel);
6919 switch (NOTE_KIND (insn))
6921 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
6922 section_switch_p = true;
6924 case NOTE_INSN_VAR_LOCATION:
6925 case NOTE_INSN_CALL_ARG_LOCATION:
6932 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
6933 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
6936 if (TARGET_CPU_ZARCH)
6938 if (curr_pool->size < S390_POOL_CHUNK_MAX)
6941 s390_end_pool (curr_pool, NULL);
6946 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
6947 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
6950 /* We will later have to insert base register reload insns.
6951 Those will have an effect on code size, which we need to
6952 consider here. This calculation makes rather pessimistic
6953 worst-case assumptions. */
6957 if (chunk_size < S390_POOL_CHUNK_MIN
6958 && curr_pool->size < S390_POOL_CHUNK_MIN
6959 && !section_switch_p)
6962 /* Pool chunks can only be inserted after BARRIERs ... */
6963 if (BARRIER_P (insn))
6965 s390_end_pool (curr_pool, insn);
6970 /* ... so if we don't find one in time, create one. */
6971 else if (chunk_size > S390_POOL_CHUNK_MAX
6972 || curr_pool->size > S390_POOL_CHUNK_MAX
6973 || section_switch_p)
6975 rtx_insn *label, *jump, *barrier, *next, *prev;
6977 if (!section_switch_p)
6979 /* We can insert the barrier only after a 'real' insn. */
6980 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
6982 if (get_attr_length (insn) == 0)
6984 /* Don't separate LTREL_BASE from the corresponding
6985 LTREL_OFFSET load. */
6992 next = NEXT_INSN (insn);
6996 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
6997 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
7001 gcc_assert (!pending_ltrel);
7003 /* The old pool has to end before the section switch
7004 note in order to make it part of the current
7006 insn = PREV_INSN (insn);
7009 label = gen_label_rtx ();
7011 if (prev && NOTE_P (prev))
7012 prev = prev_nonnote_insn (prev);
7014 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
7015 INSN_LOCATION (prev));
7017 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
7018 barrier = emit_barrier_after (jump);
7019 insn = emit_label_after (label, barrier);
7020 JUMP_LABEL (jump) = label;
7021 LABEL_NUSES (label) = 1;
7023 INSN_ADDRESSES_NEW (jump, -1);
7024 INSN_ADDRESSES_NEW (barrier, -1);
7025 INSN_ADDRESSES_NEW (insn, -1);
7027 s390_end_pool (curr_pool, barrier);
7035 s390_end_pool (curr_pool, NULL);
7036 gcc_assert (!pending_ltrel);
7038 /* Find all labels that are branched into
7039 from an insn belonging to a different chunk. */
7041 far_labels = BITMAP_ALLOC (NULL);
7043 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7045 rtx_jump_table_data *table;
7047 /* Labels marked with LABEL_PRESERVE_P can be target
7048 of non-local jumps, so we have to mark them.
7049 The same holds for named labels.
7051 Don't do that, however, if it is the label before
7055 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
7057 rtx_insn *vec_insn = NEXT_INSN (insn);
7058 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
7059 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
7061 /* Check potential targets in a table jump (casesi_jump). */
7062 else if (tablejump_p (insn, NULL, &table))
7064 rtx vec_pat = PATTERN (table);
7065 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
7067 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
7069 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
7071 if (s390_find_pool (pool_list, label)
7072 != s390_find_pool (pool_list, insn))
7073 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7076 /* If we have a direct jump (conditional or unconditional),
7077 check all potential targets. */
7078 else if (JUMP_P (insn))
7080 rtx pat = PATTERN (insn);
7082 if (GET_CODE (pat) == PARALLEL)
7083 pat = XVECEXP (pat, 0, 0);
7085 if (GET_CODE (pat) == SET)
7087 rtx label = JUMP_LABEL (insn);
7088 if (label && !ANY_RETURN_P (label))
7090 if (s390_find_pool (pool_list, label)
7091 != s390_find_pool (pool_list, insn))
7092 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7098 /* Insert base register reload insns before every pool. */
7100 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7102 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7104 rtx_insn *insn = curr_pool->first_insn;
7105 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
7108 /* Insert base register reload insns at every far label. */
7110 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7112 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
7114 struct constant_pool *pool = s390_find_pool (pool_list, insn);
7117 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7119 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
7124 BITMAP_FREE (far_labels);
7127 /* Recompute insn addresses. */
7129 init_insn_lengths ();
7130 shorten_branches (get_insns ());
7135 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7136 After we have decided to use this list, finish implementing
7137 all changes to the current function as required. */
7140 s390_chunkify_finish (struct constant_pool *pool_list)
7142 struct constant_pool *curr_pool = NULL;
7146 /* Replace all literal pool references. */
7148 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7151 replace_ltrel_base (&PATTERN (insn));
7153 curr_pool = s390_find_pool (pool_list, insn);
7157 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
7159 rtx addr, pool_ref = NULL_RTX;
7160 find_constant_pool_ref (PATTERN (insn), &pool_ref);
7163 if (s390_execute_label (insn))
7164 addr = s390_find_execute (curr_pool, insn);
7166 addr = s390_find_constant (curr_pool,
7167 get_pool_constant (pool_ref),
7168 get_pool_mode (pool_ref));
7170 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
7171 INSN_CODE (insn) = -1;
7176 /* Dump out all literal pools. */
7178 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7179 s390_dump_pool (curr_pool, 0);
7181 /* Free pool list. */
7185 struct constant_pool *next = pool_list->next;
7186 s390_free_pool (pool_list);
7191 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7192 We have decided we cannot use this list, so revert all changes
7193 to the current function that were done by s390_chunkify_start. */
7196 s390_chunkify_cancel (struct constant_pool *pool_list)
7198 struct constant_pool *curr_pool = NULL;
7201 /* Remove all pool placeholder insns. */
7203 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7205 /* Did we insert an extra barrier? Remove it. */
7206 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
7207 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
7208 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
7210 if (jump && JUMP_P (jump)
7211 && barrier && BARRIER_P (barrier)
7212 && label && LABEL_P (label)
7213 && GET_CODE (PATTERN (jump)) == SET
7214 && SET_DEST (PATTERN (jump)) == pc_rtx
7215 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
7216 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
7219 remove_insn (barrier);
7220 remove_insn (label);
7223 remove_insn (curr_pool->pool_insn);
7226 /* Remove all base register reload insns. */
7228 for (insn = get_insns (); insn; )
7230 rtx_insn *next_insn = NEXT_INSN (insn);
7232 if (NONJUMP_INSN_P (insn)
7233 && GET_CODE (PATTERN (insn)) == SET
7234 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
7235 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
7241 /* Free pool list. */
7245 struct constant_pool *next = pool_list->next;
7246 s390_free_pool (pool_list);
7251 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
7254 s390_output_pool_entry (rtx exp, enum machine_mode mode, unsigned int align)
7258 switch (GET_MODE_CLASS (mode))
7261 case MODE_DECIMAL_FLOAT:
7262 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
7264 REAL_VALUE_FROM_CONST_DOUBLE (r, exp);
7265 assemble_real (r, mode, align);
7269 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
7270 mark_symbol_refs_as_used (exp);
7279 /* Return an RTL expression representing the value of the return address
7280 for the frame COUNT steps up from the current frame. FRAME is the
7281 frame pointer of that frame. */
7284 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
7289 /* Without backchain, we fail for all but the current frame. */
7291 if (!TARGET_BACKCHAIN && count > 0)
7294 /* For the current frame, we need to make sure the initial
7295 value of RETURN_REGNUM is actually saved. */
7299 /* On non-z architectures branch splitting could overwrite r14. */
7300 if (TARGET_CPU_ZARCH)
7301 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
7304 cfun_frame_layout.save_return_addr_p = true;
7305 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
7309 if (TARGET_PACKED_STACK)
7310 offset = -2 * UNITS_PER_LONG;
7312 offset = RETURN_REGNUM * UNITS_PER_LONG;
7314 addr = plus_constant (Pmode, frame, offset);
7315 addr = memory_address (Pmode, addr);
7316 return gen_rtx_MEM (Pmode, addr);
7319 /* Return an RTL expression representing the back chain stored in
7320 the current stack frame. */
7323 s390_back_chain_rtx (void)
7327 gcc_assert (TARGET_BACKCHAIN);
7329 if (TARGET_PACKED_STACK)
7330 chain = plus_constant (Pmode, stack_pointer_rtx,
7331 STACK_POINTER_OFFSET - UNITS_PER_LONG);
7333 chain = stack_pointer_rtx;
7335 chain = gen_rtx_MEM (Pmode, chain);
7339 /* Find first call clobbered register unused in a function.
7340 This could be used as base register in a leaf function
7341 or for holding the return address before epilogue. */
7344 find_unused_clobbered_reg (void)
7347 for (i = 0; i < 6; i++)
7348 if (!df_regs_ever_live_p (i))
7354 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
7355 clobbered hard regs in SETREG. */
7358 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
7360 char *regs_ever_clobbered = (char *)data;
7361 unsigned int i, regno;
7362 enum machine_mode mode = GET_MODE (setreg);
7364 if (GET_CODE (setreg) == SUBREG)
7366 rtx inner = SUBREG_REG (setreg);
7367 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
7369 regno = subreg_regno (setreg);
7371 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
7372 regno = REGNO (setreg);
7377 i < regno + HARD_REGNO_NREGS (regno, mode);
7379 regs_ever_clobbered[i] = 1;
7382 /* Walks through all basic blocks of the current function looking
7383 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
7384 of the passed integer array REGS_EVER_CLOBBERED are set to one for
7385 each of those regs. */
7388 s390_regs_ever_clobbered (char regs_ever_clobbered[])
7394 memset (regs_ever_clobbered, 0, 32);
7396 /* For non-leaf functions we have to consider all call clobbered regs to be
7400 for (i = 0; i < 32; i++)
7401 regs_ever_clobbered[i] = call_really_used_regs[i];
7404 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
7405 this work is done by liveness analysis (mark_regs_live_at_end).
7406 Special care is needed for functions containing landing pads. Landing pads
7407 may use the eh registers, but the code which sets these registers is not
7408 contained in that function. Hence s390_regs_ever_clobbered is not able to
7409 deal with this automatically. */
7410 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
7411 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
7412 if (crtl->calls_eh_return
7413 || (cfun->machine->has_landing_pad_p
7414 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
7415 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
7417 /* For nonlocal gotos all call-saved registers have to be saved.
7418 This flag is also set for the unwinding code in libgcc.
7419 See expand_builtin_unwind_init. For regs_ever_live this is done by
7421 if (crtl->saves_all_registers)
7422 for (i = 0; i < 32; i++)
7423 if (!call_really_used_regs[i])
7424 regs_ever_clobbered[i] = 1;
7426 FOR_EACH_BB_FN (cur_bb, cfun)
7428 FOR_BB_INSNS (cur_bb, cur_insn)
7432 if (!INSN_P (cur_insn))
7435 pat = PATTERN (cur_insn);
7437 /* Ignore GPR restore insns. */
7438 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
7440 if (GET_CODE (pat) == SET
7441 && GENERAL_REG_P (SET_DEST (pat)))
7444 if (GET_MODE (SET_SRC (pat)) == DImode
7445 && FP_REG_P (SET_SRC (pat)))
7449 if (GET_CODE (SET_SRC (pat)) == MEM)
7454 if (GET_CODE (pat) == PARALLEL
7455 && load_multiple_operation (pat, VOIDmode))
7460 s390_reg_clobbered_rtx,
7461 regs_ever_clobbered);
7466 /* Determine the frame area which actually has to be accessed
7467 in the function epilogue. The values are stored at the
7468 given pointers AREA_BOTTOM (address of the lowest used stack
7469 address) and AREA_TOP (address of the first item which does
7470 not belong to the stack frame). */
7473 s390_frame_area (int *area_bottom, int *area_top)
7480 if (cfun_frame_layout.first_restore_gpr != -1)
7482 b = (cfun_frame_layout.gprs_offset
7483 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
7484 t = b + (cfun_frame_layout.last_restore_gpr
7485 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
7488 if (TARGET_64BIT && cfun_save_high_fprs_p)
7490 b = MIN (b, cfun_frame_layout.f8_offset);
7491 t = MAX (t, (cfun_frame_layout.f8_offset
7492 + cfun_frame_layout.high_fprs * 8));
7497 if (cfun_fpr_save_p (FPR4_REGNUM))
7499 b = MIN (b, cfun_frame_layout.f4_offset);
7500 t = MAX (t, cfun_frame_layout.f4_offset + 8);
7502 if (cfun_fpr_save_p (FPR6_REGNUM))
7504 b = MIN (b, cfun_frame_layout.f4_offset + 8);
7505 t = MAX (t, cfun_frame_layout.f4_offset + 16);
7511 /* Update gpr_save_slots in the frame layout trying to make use of
7512 FPRs as GPR save slots.
7513 This is a helper routine of s390_register_info. */
7516 s390_register_info_gprtofpr ()
7518 int save_reg_slot = FPR0_REGNUM;
7521 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
7524 for (i = 15; i >= 6; i--)
7526 if (cfun_gpr_save_slot (i) == 0)
7529 /* Advance to the next FP register which can be used as a
7531 while ((!call_really_used_regs[save_reg_slot]
7532 || df_regs_ever_live_p (save_reg_slot)
7533 || cfun_fpr_save_p (save_reg_slot))
7534 && FP_REGNO_P (save_reg_slot))
7536 if (!FP_REGNO_P (save_reg_slot))
7538 /* We only want to use ldgr/lgdr if we can get rid of
7539 stm/lm entirely. So undo the gpr slot allocation in
7540 case we ran out of FPR save slots. */
7541 for (j = 6; j <= 15; j++)
7542 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
7543 cfun_gpr_save_slot (j) = -1;
7546 cfun_gpr_save_slot (i) = save_reg_slot++;
7550 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
7552 This is a helper routine for s390_register_info. */
7555 s390_register_info_stdarg_fpr ()
7561 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
7562 f0-f4 for 64 bit. */
7564 || !TARGET_HARD_FLOAT
7565 || !cfun->va_list_fpr_size
7566 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
7569 min_fpr = crtl->args.info.fprs;
7570 max_fpr = min_fpr + cfun->va_list_fpr_size;
7571 if (max_fpr > FP_ARG_NUM_REG)
7572 max_fpr = FP_ARG_NUM_REG;
7574 for (i = min_fpr; i < max_fpr; i++)
7575 cfun_set_fpr_save (i + FPR0_REGNUM);
7578 /* Reserve the GPR save slots for GPRs which need to be saved due to
7580 This is a helper routine for s390_register_info. */
7583 s390_register_info_stdarg_gpr ()
7590 || !cfun->va_list_gpr_size
7591 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
7594 min_gpr = crtl->args.info.gprs;
7595 max_gpr = min_gpr + cfun->va_list_gpr_size;
7596 if (max_gpr > GP_ARG_NUM_REG)
7597 max_gpr = GP_ARG_NUM_REG;
7599 for (i = min_gpr; i < max_gpr; i++)
7600 cfun_gpr_save_slot (2 + i) = -1;
7603 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
7604 for registers which need to be saved in function prologue.
7605 This function can be used until the insns emitted for save/restore
7606 of the regs are visible in the RTL stream. */
7609 s390_register_info ()
7612 char clobbered_regs[32];
7614 gcc_assert (!epilogue_completed);
7616 if (reload_completed)
7617 /* After reload we rely on our own routine to determine which
7618 registers need saving. */
7619 s390_regs_ever_clobbered (clobbered_regs);
7621 /* During reload we use regs_ever_live as a base since reload
7622 does changes in there which we otherwise would not be aware
7624 for (i = 0; i < 32; i++)
7625 clobbered_regs[i] = df_regs_ever_live_p (i);
7627 for (i = 0; i < 32; i++)
7628 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
7630 /* Mark the call-saved FPRs which need to be saved.
7631 This needs to be done before checking the special GPRs since the
7632 stack pointer usage depends on whether high FPRs have to be saved
7634 cfun_frame_layout.fpr_bitmap = 0;
7635 cfun_frame_layout.high_fprs = 0;
7636 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
7637 if (clobbered_regs[i] && !call_really_used_regs[i])
7639 cfun_set_fpr_save (i);
7640 if (i >= FPR8_REGNUM)
7641 cfun_frame_layout.high_fprs++;
7645 clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
7646 |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7648 clobbered_regs[BASE_REGNUM]
7649 |= (cfun->machine->base_reg
7650 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
7652 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
7653 |= !!frame_pointer_needed;
7655 /* On pre z900 machines this might take until machine dependent
7657 save_return_addr_p will only be set on non-zarch machines so
7658 there is no risk that r14 goes into an FPR instead of a stack
7660 clobbered_regs[RETURN_REGNUM]
7662 || TARGET_TPF_PROFILING
7663 || cfun->machine->split_branches_pending_p
7664 || cfun_frame_layout.save_return_addr_p
7665 || crtl->calls_eh_return);
7667 clobbered_regs[STACK_POINTER_REGNUM]
7669 || TARGET_TPF_PROFILING
7670 || cfun_save_high_fprs_p
7671 || get_frame_size () > 0
7672 || (reload_completed && cfun_frame_layout.frame_size > 0)
7673 || cfun->calls_alloca);
7675 memset (cfun_frame_layout.gpr_save_slots, 0, 16);
7677 for (i = 6; i < 16; i++)
7678 if (clobbered_regs[i])
7679 cfun_gpr_save_slot (i) = -1;
7681 s390_register_info_stdarg_fpr ();
7682 s390_register_info_gprtofpr ();
7684 /* First find the range of GPRs to be restored. Vararg regs don't
7685 need to be restored so we do it before assigning slots to the
7687 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7688 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7689 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
7690 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
7692 /* stdarg functions might need to save GPRs 2 to 6. This might
7693 override the GPR->FPR save decision made above for r6 since
7694 vararg regs must go to the stack. */
7695 s390_register_info_stdarg_gpr ();
7697 /* Now the range of GPRs which need saving. */
7698 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7699 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7700 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
7701 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
7704 /* This function is called by s390_optimize_prologue in order to get
7705 rid of unnecessary GPR save/restore instructions. The register info
7706 for the GPRs is re-computed and the ranges are re-calculated. */
7709 s390_optimize_register_info ()
7711 char clobbered_regs[32];
7714 gcc_assert (epilogue_completed);
7715 gcc_assert (!cfun->machine->split_branches_pending_p);
7717 s390_regs_ever_clobbered (clobbered_regs);
7719 for (i = 0; i < 32; i++)
7720 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
7722 /* There is still special treatment needed for cases invisible to
7723 s390_regs_ever_clobbered. */
7724 clobbered_regs[RETURN_REGNUM]
7725 |= (TARGET_TPF_PROFILING
7726 /* When expanding builtin_return_addr in ESA mode we do not
7727 know whether r14 will later be needed as scratch reg when
7728 doing branch splitting. So the builtin always accesses the
7729 r14 save slot and we need to stick to the save/restore
7730 decision for r14 even if it turns out that it didn't get
7732 || cfun_frame_layout.save_return_addr_p
7733 || crtl->calls_eh_return);
7735 memset (cfun_frame_layout.gpr_save_slots, 0, 6);
7737 for (i = 6; i < 16; i++)
7738 if (!clobbered_regs[i])
7739 cfun_gpr_save_slot (i) = 0;
7741 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7742 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7743 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
7744 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
7746 s390_register_info_stdarg_gpr ();
7748 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7749 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7750 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
7751 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
7754 /* Fill cfun->machine with info about frame of current function. */
7757 s390_frame_info (void)
7759 HOST_WIDE_INT lowest_offset;
7761 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
7762 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
7764 /* The va_arg builtin uses a constant distance of 16 *
7765 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
7766 pointer. So even if we are going to save the stack pointer in an
7767 FPR we need the stack space in order to keep the offsets
7769 if (cfun->stdarg && cfun_save_arg_fprs_p)
7771 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
7773 if (cfun_frame_layout.first_save_gpr_slot == -1)
7774 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
7777 cfun_frame_layout.frame_size = get_frame_size ();
7778 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
7779 fatal_error ("total size of local variables exceeds architecture limit");
7781 if (!TARGET_PACKED_STACK)
7783 /* Fixed stack layout. */
7784 cfun_frame_layout.backchain_offset = 0;
7785 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
7786 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
7787 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
7788 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
7791 else if (TARGET_BACKCHAIN)
7793 /* Kernel stack layout - packed stack, backchain, no float */
7794 gcc_assert (TARGET_SOFT_FLOAT);
7795 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
7798 /* The distance between the backchain and the return address
7799 save slot must not change. So we always need a slot for the
7800 stack pointer which resides in between. */
7801 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
7803 cfun_frame_layout.gprs_offset
7804 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
7806 /* FPRs will not be saved. Nevertheless pick sane values to
7807 keep area calculations valid. */
7808 cfun_frame_layout.f0_offset =
7809 cfun_frame_layout.f4_offset =
7810 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
7816 /* Packed stack layout without backchain. */
7818 /* With stdarg FPRs need their dedicated slots. */
7819 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
7820 : (cfun_fpr_save_p (FPR4_REGNUM) +
7821 cfun_fpr_save_p (FPR6_REGNUM)));
7822 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
7824 num_fprs = (cfun->stdarg ? 2
7825 : (cfun_fpr_save_p (FPR0_REGNUM)
7826 + cfun_fpr_save_p (FPR2_REGNUM)));
7827 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
7829 cfun_frame_layout.gprs_offset
7830 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
7832 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
7833 - cfun_frame_layout.high_fprs * 8);
7836 if (cfun_save_high_fprs_p)
7837 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
7840 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
7842 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
7843 sized area at the bottom of the stack. This is required also for
7844 leaf functions. When GCC generates a local stack reference it
7845 will always add STACK_POINTER_OFFSET to all these references. */
7847 && !TARGET_TPF_PROFILING
7848 && cfun_frame_layout.frame_size == 0
7849 && !cfun->calls_alloca)
7852 /* Calculate the number of bytes we have used in our own register
7853 save area. With the packed stack layout we can re-use the
7854 remaining bytes for normal stack elements. */
7856 if (TARGET_PACKED_STACK)
7857 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
7858 cfun_frame_layout.f4_offset),
7859 cfun_frame_layout.gprs_offset);
7863 if (TARGET_BACKCHAIN)
7864 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
7866 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
7868 /* If under 31 bit an odd number of gprs has to be saved we have to
7869 adjust the frame size to sustain 8 byte alignment of stack
7871 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
7872 STACK_BOUNDARY / BITS_PER_UNIT - 1)
7873 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
7876 /* Generate frame layout. Fills in register and frame data for the current
7877 function in cfun->machine. This routine can be called multiple times;
7878 it will re-do the complete frame layout every time. */
7881 s390_init_frame_layout (void)
7883 HOST_WIDE_INT frame_size;
7886 gcc_assert (!reload_completed);
7888 /* On S/390 machines, we may need to perform branch splitting, which
7889 will require both base and return address register. We have no
7890 choice but to assume we're going to need them until right at the
7891 end of the machine dependent reorg phase. */
7892 if (!TARGET_CPU_ZARCH)
7893 cfun->machine->split_branches_pending_p = true;
7897 frame_size = cfun_frame_layout.frame_size;
7899 /* Try to predict whether we'll need the base register. */
7900 base_used = cfun->machine->split_branches_pending_p
7901 || crtl->uses_const_pool
7902 || (!DISP_IN_RANGE (frame_size)
7903 && !CONST_OK_FOR_K (frame_size));
7905 /* Decide which register to use as literal pool base. In small
7906 leaf functions, try to use an unused call-clobbered register
7907 as base register to avoid save/restore overhead. */
7909 cfun->machine->base_reg = NULL_RTX;
7910 else if (crtl->is_leaf && !df_regs_ever_live_p (5))
7911 cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
7913 cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
7915 s390_register_info ();
7918 while (frame_size != cfun_frame_layout.frame_size);
7921 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
7922 the TX is nonescaping. A transaction is considered escaping if
7923 there is at least one path from tbegin returning CC0 to the
7924 function exit block without an tend.
7926 The check so far has some limitations:
7927 - only single tbegin/tend BBs are supported
7928 - the first cond jump after tbegin must separate the CC0 path from ~CC0
7929 - when CC is copied to a GPR and the CC0 check is done with the GPR
7930 this is not supported
7934 s390_optimize_nonescaping_tx (void)
7936 const unsigned int CC0 = 1 << 3;
7937 basic_block tbegin_bb = NULL;
7938 basic_block tend_bb = NULL;
7943 rtx_insn *tbegin_insn = NULL;
7945 if (!cfun->machine->tbegin_p)
7948 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
7950 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
7955 FOR_BB_INSNS (bb, insn)
7957 rtx ite, cc, pat, target;
7958 unsigned HOST_WIDE_INT mask;
7960 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
7963 pat = PATTERN (insn);
7965 if (GET_CODE (pat) == PARALLEL)
7966 pat = XVECEXP (pat, 0, 0);
7968 if (GET_CODE (pat) != SET
7969 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
7972 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
7978 /* Just return if the tbegin doesn't have clobbers. */
7979 if (GET_CODE (PATTERN (insn)) != PARALLEL)
7982 if (tbegin_bb != NULL)
7985 /* Find the next conditional jump. */
7986 for (tmp = NEXT_INSN (insn);
7988 tmp = NEXT_INSN (tmp))
7990 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
7995 ite = SET_SRC (PATTERN (tmp));
7996 if (GET_CODE (ite) != IF_THEN_ELSE)
7999 cc = XEXP (XEXP (ite, 0), 0);
8000 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
8001 || GET_MODE (cc) != CCRAWmode
8002 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
8005 if (bb->succs->length () != 2)
8008 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
8009 if (GET_CODE (XEXP (ite, 0)) == NE)
8013 target = XEXP (ite, 1);
8014 else if (mask == (CC0 ^ 0xf))
8015 target = XEXP (ite, 2);
8023 ei = ei_start (bb->succs);
8024 e1 = ei_safe_edge (ei);
8026 e2 = ei_safe_edge (ei);
8028 if (e2->flags & EDGE_FALLTHRU)
8031 e1 = ei_safe_edge (ei);
8034 if (!(e1->flags & EDGE_FALLTHRU))
8037 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
8039 if (tmp == BB_END (bb))
8044 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
8046 if (tend_bb != NULL)
8053 /* Either we successfully remove the FPR clobbers here or we are not
8054 able to do anything for this TX. Both cases don't qualify for
8056 cfun->machine->tbegin_p = false;
8058 if (tbegin_bb == NULL || tend_bb == NULL)
8061 calculate_dominance_info (CDI_POST_DOMINATORS);
8062 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
8063 free_dominance_info (CDI_POST_DOMINATORS);
8068 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
8070 XVECEXP (PATTERN (tbegin_insn), 0, 0),
8071 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
8072 INSN_CODE (tbegin_insn) = -1;
8073 df_insn_rescan (tbegin_insn);
8078 /* Return true if it is legal to put a value with MODE into REGNO. */
8081 s390_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
8083 switch (REGNO_REG_CLASS (regno))
8086 if (REGNO_PAIR_OK (regno, mode))
8088 if (mode == SImode || mode == DImode)
8091 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
8096 if (FRAME_REGNO_P (regno) && mode == Pmode)
8101 if (REGNO_PAIR_OK (regno, mode))
8104 || (mode != TFmode && mode != TCmode && mode != TDmode))
8109 if (GET_MODE_CLASS (mode) == MODE_CC)
8113 if (REGNO_PAIR_OK (regno, mode))
8115 if (mode == SImode || mode == Pmode)
8126 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
8129 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
8131 /* Once we've decided upon a register to use as base register, it must
8132 no longer be used for any other purpose. */
8133 if (cfun->machine->base_reg)
8134 if (REGNO (cfun->machine->base_reg) == old_reg
8135 || REGNO (cfun->machine->base_reg) == new_reg)
8138 /* Prevent regrename from using call-saved regs which haven't
8139 actually been saved. This is necessary since regrename assumes
8140 the backend save/restore decisions are based on
8141 df_regs_ever_live. Since we have our own routine we have to tell
8142 regrename manually about it. */
8143 if (GENERAL_REGNO_P (new_reg)
8144 && !call_really_used_regs[new_reg]
8145 && cfun_gpr_save_slot (new_reg) == 0)
8151 /* Return nonzero if register REGNO can be used as a scratch register
8155 s390_hard_regno_scratch_ok (unsigned int regno)
8157 /* See s390_hard_regno_rename_ok. */
8158 if (GENERAL_REGNO_P (regno)
8159 && !call_really_used_regs[regno]
8160 && cfun_gpr_save_slot (regno) == 0)
8166 /* Maximum number of registers to represent a value of mode MODE
8167 in a register of class RCLASS. */
8170 s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
8175 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
8176 return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8);
8178 return (GET_MODE_SIZE (mode) + 8 - 1) / 8;
8180 return (GET_MODE_SIZE (mode) + 4 - 1) / 4;
8184 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8187 /* Return true if we use LRA instead of reload pass. */
8191 return s390_lra_flag;
8194 /* Return true if register FROM can be eliminated via register TO. */
8197 s390_can_eliminate (const int from, const int to)
8199 /* On zSeries machines, we have not marked the base register as fixed.
8200 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
8201 If a function requires the base register, we say here that this
8202 elimination cannot be performed. This will cause reload to free
8203 up the base register (as if it were fixed). On the other hand,
8204 if the current function does *not* require the base register, we
8205 say here the elimination succeeds, which in turn allows reload
8206 to allocate the base register for any other purpose. */
8207 if (from == BASE_REGNUM && to == BASE_REGNUM)
8209 if (TARGET_CPU_ZARCH)
8211 s390_init_frame_layout ();
8212 return cfun->machine->base_reg == NULL_RTX;
8218 /* Everything else must point into the stack frame. */
8219 gcc_assert (to == STACK_POINTER_REGNUM
8220 || to == HARD_FRAME_POINTER_REGNUM);
8222 gcc_assert (from == FRAME_POINTER_REGNUM
8223 || from == ARG_POINTER_REGNUM
8224 || from == RETURN_ADDRESS_POINTER_REGNUM);
8226 /* Make sure we actually saved the return address. */
8227 if (from == RETURN_ADDRESS_POINTER_REGNUM)
8228 if (!crtl->calls_eh_return
8230 && !cfun_frame_layout.save_return_addr_p)
8236 /* Return offset between register FROM and TO initially after prolog. */
8239 s390_initial_elimination_offset (int from, int to)
8241 HOST_WIDE_INT offset;
8243 /* ??? Why are we called for non-eliminable pairs? */
8244 if (!s390_can_eliminate (from, to))
8249 case FRAME_POINTER_REGNUM:
8250 offset = (get_frame_size()
8251 + STACK_POINTER_OFFSET
8252 + crtl->outgoing_args_size);
8255 case ARG_POINTER_REGNUM:
8256 s390_init_frame_layout ();
8257 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
8260 case RETURN_ADDRESS_POINTER_REGNUM:
8261 s390_init_frame_layout ();
8263 if (cfun_frame_layout.first_save_gpr_slot == -1)
8265 /* If it turns out that for stdarg nothing went into the reg
8266 save area we also do not need the return address
8268 if (cfun->stdarg && !cfun_save_arg_fprs_p)
8274 /* In order to make the following work it is not necessary for
8275 r14 to have a save slot. It is sufficient if one other GPR
8276 got one. Since the GPRs are always stored without gaps we
8277 are able to calculate where the r14 save slot would
8279 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
8280 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
8295 /* Emit insn to save fpr REGNUM at offset OFFSET relative
8296 to register BASE. Return generated insn. */
8299 save_fpr (rtx base, int offset, int regnum)
8302 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
8304 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
8305 set_mem_alias_set (addr, get_varargs_alias_set ());
8307 set_mem_alias_set (addr, get_frame_alias_set ());
8309 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
8312 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
8313 to register BASE. Return generated insn. */
8316 restore_fpr (rtx base, int offset, int regnum)
8319 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
8320 set_mem_alias_set (addr, get_frame_alias_set ());
8322 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
8325 /* Return true if REGNO is a global register, but not one
8326 of the special ones that need to be saved/restored in anyway. */
8329 global_not_special_regno_p (int regno)
8331 return (global_regs[regno]
8332 /* These registers are special and need to be
8333 restored in any case. */
8334 && !(regno == STACK_POINTER_REGNUM
8335 || regno == RETURN_REGNUM
8336 || regno == BASE_REGNUM
8337 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
8340 /* Generate insn to save registers FIRST to LAST into
8341 the register save area located at offset OFFSET
8342 relative to register BASE. */
8345 save_gprs (rtx base, int offset, int first, int last)
8347 rtx addr, insn, note;
8350 addr = plus_constant (Pmode, base, offset);
8351 addr = gen_rtx_MEM (Pmode, addr);
8353 set_mem_alias_set (addr, get_frame_alias_set ());
8355 /* Special-case single register. */
8359 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
8361 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
8363 if (!global_not_special_regno_p (first))
8364 RTX_FRAME_RELATED_P (insn) = 1;
8369 insn = gen_store_multiple (addr,
8370 gen_rtx_REG (Pmode, first),
8371 GEN_INT (last - first + 1));
8373 if (first <= 6 && cfun->stdarg)
8374 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8376 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
8379 set_mem_alias_set (mem, get_varargs_alias_set ());
8382 /* We need to set the FRAME_RELATED flag on all SETs
8383 inside the store-multiple pattern.
8385 However, we must not emit DWARF records for registers 2..5
8386 if they are stored for use by variable arguments ...
8388 ??? Unfortunately, it is not enough to simply not the
8389 FRAME_RELATED flags for those SETs, because the first SET
8390 of the PARALLEL is always treated as if it had the flag
8391 set, even if it does not. Therefore we emit a new pattern
8392 without those registers as REG_FRAME_RELATED_EXPR note. */
8394 if (first >= 6 && !global_not_special_regno_p (first))
8396 rtx pat = PATTERN (insn);
8398 for (i = 0; i < XVECLEN (pat, 0); i++)
8399 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
8400 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
8402 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
8404 RTX_FRAME_RELATED_P (insn) = 1;
8410 for (start = first >= 6 ? first : 6; start <= last; start++)
8411 if (!global_not_special_regno_p (start))
8417 addr = plus_constant (Pmode, base,
8418 offset + (start - first) * UNITS_PER_LONG);
8423 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
8424 gen_rtx_REG (Pmode, start));
8426 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
8427 gen_rtx_REG (Pmode, start));
8428 note = PATTERN (note);
8430 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
8431 RTX_FRAME_RELATED_P (insn) = 1;
8436 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
8437 gen_rtx_REG (Pmode, start),
8438 GEN_INT (last - start + 1));
8439 note = PATTERN (note);
8441 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
8443 for (i = 0; i < XVECLEN (note, 0); i++)
8444 if (GET_CODE (XVECEXP (note, 0, i)) == SET
8445 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
8447 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
8449 RTX_FRAME_RELATED_P (insn) = 1;
8455 /* Generate insn to restore registers FIRST to LAST from
8456 the register save area located at offset OFFSET
8457 relative to register BASE. */
8460 restore_gprs (rtx base, int offset, int first, int last)
8464 addr = plus_constant (Pmode, base, offset);
8465 addr = gen_rtx_MEM (Pmode, addr);
8466 set_mem_alias_set (addr, get_frame_alias_set ());
8468 /* Special-case single register. */
8472 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
8474 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
8476 RTX_FRAME_RELATED_P (insn) = 1;
8480 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
8482 GEN_INT (last - first + 1));
8483 RTX_FRAME_RELATED_P (insn) = 1;
8487 /* Return insn sequence to load the GOT register. */
8489 static GTY(()) rtx got_symbol;
8491 s390_load_got (void)
8495 /* We cannot use pic_offset_table_rtx here since we use this
8496 function also for non-pic if __tls_get_offset is called and in
8497 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
8499 rtx got_rtx = gen_rtx_REG (Pmode, 12);
8503 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8504 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
8509 if (TARGET_CPU_ZARCH)
8511 emit_move_insn (got_rtx, got_symbol);
8517 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
8518 UNSPEC_LTREL_OFFSET);
8519 offset = gen_rtx_CONST (Pmode, offset);
8520 offset = force_const_mem (Pmode, offset);
8522 emit_move_insn (got_rtx, offset);
8524 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
8526 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
8528 emit_move_insn (got_rtx, offset);
8531 insns = get_insns ();
8536 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
8537 and the change to the stack pointer. */
8540 s390_emit_stack_tie (void)
8542 rtx mem = gen_frame_mem (BLKmode,
8543 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
8545 emit_insn (gen_stack_tie (mem));
8548 /* Copy GPRS into FPR save slots. */
8551 s390_save_gprs_to_fprs (void)
8555 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
8558 for (i = 6; i < 16; i++)
8560 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
8563 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
8564 gen_rtx_REG (DImode, i));
8565 RTX_FRAME_RELATED_P (insn) = 1;
8570 /* Restore GPRs from FPR save slots. */
8573 s390_restore_gprs_from_fprs (void)
8577 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
8580 for (i = 6; i < 16; i++)
8582 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
8585 emit_move_insn (gen_rtx_REG (DImode, i),
8586 gen_rtx_REG (DImode, cfun_gpr_save_slot (i)));
8587 df_set_regs_ever_live (i, true);
8588 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
8589 if (i == STACK_POINTER_REGNUM)
8590 add_reg_note (insn, REG_CFA_DEF_CFA,
8591 plus_constant (Pmode, stack_pointer_rtx,
8592 STACK_POINTER_OFFSET));
8593 RTX_FRAME_RELATED_P (insn) = 1;
8599 /* A pass run immediately before shrink-wrapping and prologue and epilogue
8604 const pass_data pass_data_s390_early_mach =
8606 RTL_PASS, /* type */
8607 "early_mach", /* name */
8608 OPTGROUP_NONE, /* optinfo_flags */
8609 TV_MACH_DEP, /* tv_id */
8610 0, /* properties_required */
8611 0, /* properties_provided */
8612 0, /* properties_destroyed */
8613 0, /* todo_flags_start */
8614 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
8617 class pass_s390_early_mach : public rtl_opt_pass
8620 pass_s390_early_mach (gcc::context *ctxt)
8621 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
8624 /* opt_pass methods: */
8625 virtual unsigned int execute (function *);
8627 }; // class pass_s390_early_mach
8630 pass_s390_early_mach::execute (function *fun)
8634 /* Try to get rid of the FPR clobbers. */
8635 s390_optimize_nonescaping_tx ();
8637 /* Re-compute register info. */
8638 s390_register_info ();
8640 /* If we're using a base register, ensure that it is always valid for
8641 the first non-prologue instruction. */
8642 if (fun->machine->base_reg)
8643 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
8645 /* Annotate all constant pool references to let the scheduler know
8646 they implicitly use the base register. */
8647 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8650 annotate_constant_pool_refs (&PATTERN (insn));
8651 df_insn_rescan (insn);
8658 /* Expand the prologue into a bunch of separate insns. */
8661 s390_emit_prologue (void)
8669 /* Choose best register to use for temp use within prologue.
8670 See below for why TPF must use the register 1. */
8672 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
8674 && !TARGET_TPF_PROFILING)
8675 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8677 temp_reg = gen_rtx_REG (Pmode, 1);
8679 s390_save_gprs_to_fprs ();
8681 /* Save call saved gprs. */
8682 if (cfun_frame_layout.first_save_gpr != -1)
8684 insn = save_gprs (stack_pointer_rtx,
8685 cfun_frame_layout.gprs_offset +
8686 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
8687 - cfun_frame_layout.first_save_gpr_slot),
8688 cfun_frame_layout.first_save_gpr,
8689 cfun_frame_layout.last_save_gpr);
8693 /* Dummy insn to mark literal pool slot. */
8695 if (cfun->machine->base_reg)
8696 emit_insn (gen_main_pool (cfun->machine->base_reg));
8698 offset = cfun_frame_layout.f0_offset;
8700 /* Save f0 and f2. */
8701 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
8703 if (cfun_fpr_save_p (i))
8705 save_fpr (stack_pointer_rtx, offset, i);
8708 else if (!TARGET_PACKED_STACK || cfun->stdarg)
8712 /* Save f4 and f6. */
8713 offset = cfun_frame_layout.f4_offset;
8714 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
8716 if (cfun_fpr_save_p (i))
8718 insn = save_fpr (stack_pointer_rtx, offset, i);
8721 /* If f4 and f6 are call clobbered they are saved due to
8722 stdargs and therefore are not frame related. */
8723 if (!call_really_used_regs[i])
8724 RTX_FRAME_RELATED_P (insn) = 1;
8726 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
8730 if (TARGET_PACKED_STACK
8731 && cfun_save_high_fprs_p
8732 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
8734 offset = (cfun_frame_layout.f8_offset
8735 + (cfun_frame_layout.high_fprs - 1) * 8);
8737 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
8738 if (cfun_fpr_save_p (i))
8740 insn = save_fpr (stack_pointer_rtx, offset, i);
8742 RTX_FRAME_RELATED_P (insn) = 1;
8745 if (offset >= cfun_frame_layout.f8_offset)
8749 if (!TARGET_PACKED_STACK)
8750 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
8752 if (flag_stack_usage_info)
8753 current_function_static_stack_size = cfun_frame_layout.frame_size;
8755 /* Decrement stack pointer. */
8757 if (cfun_frame_layout.frame_size > 0)
8759 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8762 if (s390_stack_size)
8764 HOST_WIDE_INT stack_guard;
8766 if (s390_stack_guard)
8767 stack_guard = s390_stack_guard;
8770 /* If no value for stack guard is provided the smallest power of 2
8771 larger than the current frame size is chosen. */
8773 while (stack_guard < cfun_frame_layout.frame_size)
8777 if (cfun_frame_layout.frame_size >= s390_stack_size)
8779 warning (0, "frame size of function %qs is %wd"
8780 " bytes exceeding user provided stack limit of "
8782 "An unconditional trap is added.",
8783 current_function_name(), cfun_frame_layout.frame_size,
8785 emit_insn (gen_trap ());
8789 /* stack_guard has to be smaller than s390_stack_size.
8790 Otherwise we would emit an AND with zero which would
8791 not match the test under mask pattern. */
8792 if (stack_guard >= s390_stack_size)
8794 warning (0, "frame size of function %qs is %wd"
8795 " bytes which is more than half the stack size. "
8796 "The dynamic check would not be reliable. "
8797 "No check emitted for this function.",
8798 current_function_name(),
8799 cfun_frame_layout.frame_size);
8803 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
8804 & ~(stack_guard - 1));
8806 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
8807 GEN_INT (stack_check_mask));
8809 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
8811 t, const0_rtx, const0_rtx));
8813 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
8815 t, const0_rtx, const0_rtx));
8820 if (s390_warn_framesize > 0
8821 && cfun_frame_layout.frame_size >= s390_warn_framesize)
8822 warning (0, "frame size of %qs is %wd bytes",
8823 current_function_name (), cfun_frame_layout.frame_size);
8825 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
8826 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
8828 /* Save incoming stack pointer into temp reg. */
8829 if (TARGET_BACKCHAIN || next_fpr)
8830 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
8832 /* Subtract frame size from stack pointer. */
8834 if (DISP_IN_RANGE (INTVAL (frame_off)))
8836 insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8837 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8839 insn = emit_insn (insn);
8843 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
8844 frame_off = force_const_mem (Pmode, frame_off);
8846 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
8847 annotate_constant_pool_refs (&PATTERN (insn));
8850 RTX_FRAME_RELATED_P (insn) = 1;
8851 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8852 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8853 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8854 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8857 /* Set backchain. */
8859 if (TARGET_BACKCHAIN)
8861 if (cfun_frame_layout.backchain_offset)
8862 addr = gen_rtx_MEM (Pmode,
8863 plus_constant (Pmode, stack_pointer_rtx,
8864 cfun_frame_layout.backchain_offset));
8866 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8867 set_mem_alias_set (addr, get_frame_alias_set ());
8868 insn = emit_insn (gen_move_insn (addr, temp_reg));
8871 /* If we support non-call exceptions (e.g. for Java),
8872 we need to make sure the backchain pointer is set up
8873 before any possibly trapping memory access. */
8874 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
8876 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8877 emit_clobber (addr);
8881 /* Save fprs 8 - 15 (64 bit ABI). */
8883 if (cfun_save_high_fprs_p && next_fpr)
8885 /* If the stack might be accessed through a different register
8886 we have to make sure that the stack pointer decrement is not
8887 moved below the use of the stack slots. */
8888 s390_emit_stack_tie ();
8890 insn = emit_insn (gen_add2_insn (temp_reg,
8891 GEN_INT (cfun_frame_layout.f8_offset)));
8895 for (i = FPR8_REGNUM; i <= next_fpr; i++)
8896 if (cfun_fpr_save_p (i))
8898 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
8899 cfun_frame_layout.frame_size
8900 + cfun_frame_layout.f8_offset
8903 insn = save_fpr (temp_reg, offset, i);
8905 RTX_FRAME_RELATED_P (insn) = 1;
8906 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8907 gen_rtx_SET (VOIDmode,
8908 gen_rtx_MEM (DFmode, addr),
8909 gen_rtx_REG (DFmode, i)));
8913 /* Set frame pointer, if needed. */
8915 if (frame_pointer_needed)
8917 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8918 RTX_FRAME_RELATED_P (insn) = 1;
8921 /* Set up got pointer, if needed. */
8923 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
8925 rtx_insn *insns = s390_load_got ();
8927 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
8928 annotate_constant_pool_refs (&PATTERN (insn));
8933 if (TARGET_TPF_PROFILING)
8935 /* Generate a BAS instruction to serve as a function
8936 entry intercept to facilitate the use of tracing
8937 algorithms located at the branch target. */
8938 emit_insn (gen_prologue_tpf ());
8940 /* Emit a blockage here so that all code
8941 lies between the profiling mechanisms. */
8942 emit_insn (gen_blockage ());
8946 /* Expand the epilogue into a bunch of separate insns. */
8949 s390_emit_epilogue (bool sibcall)
8951 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
8952 int area_bottom, area_top, offset = 0;
8957 if (TARGET_TPF_PROFILING)
8960 /* Generate a BAS instruction to serve as a function
8961 entry intercept to facilitate the use of tracing
8962 algorithms located at the branch target. */
8964 /* Emit a blockage here so that all code
8965 lies between the profiling mechanisms. */
8966 emit_insn (gen_blockage ());
8968 emit_insn (gen_epilogue_tpf ());
8971 /* Check whether to use frame or stack pointer for restore. */
8973 frame_pointer = (frame_pointer_needed
8974 ? hard_frame_pointer_rtx : stack_pointer_rtx);
8976 s390_frame_area (&area_bottom, &area_top);
8978 /* Check whether we can access the register save area.
8979 If not, increment the frame pointer as required. */
8981 if (area_top <= area_bottom)
8983 /* Nothing to restore. */
8985 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
8986 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
8988 /* Area is in range. */
8989 offset = cfun_frame_layout.frame_size;
8993 rtx insn, frame_off, cfa;
8995 offset = area_bottom < 0 ? -area_bottom : 0;
8996 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
8998 cfa = gen_rtx_SET (VOIDmode, frame_pointer,
8999 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
9000 if (DISP_IN_RANGE (INTVAL (frame_off)))
9002 insn = gen_rtx_SET (VOIDmode, frame_pointer,
9003 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
9004 insn = emit_insn (insn);
9008 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
9009 frame_off = force_const_mem (Pmode, frame_off);
9011 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
9012 annotate_constant_pool_refs (&PATTERN (insn));
9014 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
9015 RTX_FRAME_RELATED_P (insn) = 1;
9018 /* Restore call saved fprs. */
9022 if (cfun_save_high_fprs_p)
9024 next_offset = cfun_frame_layout.f8_offset;
9025 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
9027 if (cfun_fpr_save_p (i))
9029 restore_fpr (frame_pointer,
9030 offset + next_offset, i);
9032 = alloc_reg_note (REG_CFA_RESTORE,
9033 gen_rtx_REG (DFmode, i), cfa_restores);
9042 next_offset = cfun_frame_layout.f4_offset;
9044 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
9046 if (cfun_fpr_save_p (i))
9048 restore_fpr (frame_pointer,
9049 offset + next_offset, i);
9051 = alloc_reg_note (REG_CFA_RESTORE,
9052 gen_rtx_REG (DFmode, i), cfa_restores);
9055 else if (!TARGET_PACKED_STACK)
9061 /* Return register. */
9063 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
9065 /* Restore call saved gprs. */
9067 if (cfun_frame_layout.first_restore_gpr != -1)
9072 /* Check for global register and save them
9073 to stack location from where they get restored. */
9075 for (i = cfun_frame_layout.first_restore_gpr;
9076 i <= cfun_frame_layout.last_restore_gpr;
9079 if (global_not_special_regno_p (i))
9081 addr = plus_constant (Pmode, frame_pointer,
9082 offset + cfun_frame_layout.gprs_offset
9083 + (i - cfun_frame_layout.first_save_gpr_slot)
9085 addr = gen_rtx_MEM (Pmode, addr);
9086 set_mem_alias_set (addr, get_frame_alias_set ());
9087 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
9091 = alloc_reg_note (REG_CFA_RESTORE,
9092 gen_rtx_REG (Pmode, i), cfa_restores);
9097 /* Fetch return address from stack before load multiple,
9098 this will do good for scheduling.
9100 Only do this if we already decided that r14 needs to be
9101 saved to a stack slot. (And not just because r14 happens to
9102 be in between two GPRs which need saving.) Otherwise it
9103 would be difficult to take that decision back in
9104 s390_optimize_prologue. */
9105 if (cfun_gpr_save_slot (RETURN_REGNUM) == -1)
9107 int return_regnum = find_unused_clobbered_reg();
9110 return_reg = gen_rtx_REG (Pmode, return_regnum);
9112 addr = plus_constant (Pmode, frame_pointer,
9113 offset + cfun_frame_layout.gprs_offset
9115 - cfun_frame_layout.first_save_gpr_slot)
9117 addr = gen_rtx_MEM (Pmode, addr);
9118 set_mem_alias_set (addr, get_frame_alias_set ());
9119 emit_move_insn (return_reg, addr);
9121 /* Once we did that optimization we have to make sure
9122 s390_optimize_prologue does not try to remove the
9123 store of r14 since we will not be able to find the
9124 load issued here. */
9125 cfun_frame_layout.save_return_addr_p = true;
9129 insn = restore_gprs (frame_pointer,
9130 offset + cfun_frame_layout.gprs_offset
9131 + (cfun_frame_layout.first_restore_gpr
9132 - cfun_frame_layout.first_save_gpr_slot)
9134 cfun_frame_layout.first_restore_gpr,
9135 cfun_frame_layout.last_restore_gpr);
9136 insn = emit_insn (insn);
9137 REG_NOTES (insn) = cfa_restores;
9138 add_reg_note (insn, REG_CFA_DEF_CFA,
9139 plus_constant (Pmode, stack_pointer_rtx,
9140 STACK_POINTER_OFFSET));
9141 RTX_FRAME_RELATED_P (insn) = 1;
9144 s390_restore_gprs_from_fprs ();
9149 /* Return to caller. */
9151 p = rtvec_alloc (2);
9153 RTVEC_ELT (p, 0) = ret_rtx;
9154 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
9155 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
9159 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
9162 s300_set_up_by_prologue (hard_reg_set_container *regs)
9164 if (cfun->machine->base_reg
9165 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
9166 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
9169 /* Return true if the function can use simple_return to return outside
9170 of a shrink-wrapped region. At present shrink-wrapping is supported
9174 s390_can_use_simple_return_insn (void)
9179 /* Return true if the epilogue is guaranteed to contain only a return
9180 instruction and if a direct return can therefore be used instead.
9181 One of the main advantages of using direct return instructions
9182 is that we can then use conditional returns. */
9185 s390_can_use_return_insn (void)
9189 if (!reload_completed)
9195 if (TARGET_TPF_PROFILING)
9198 for (i = 0; i < 16; i++)
9199 if (cfun_gpr_save_slot (i))
9202 /* For 31 bit this is not covered by the frame_size check below
9203 since f4, f6 are saved in the register save area without needing
9204 additional stack space. */
9206 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
9209 if (cfun->machine->base_reg
9210 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
9213 return cfun_frame_layout.frame_size == 0;
9216 /* Return the size in bytes of a function argument of
9217 type TYPE and/or mode MODE. At least one of TYPE or
9218 MODE must be specified. */
9221 s390_function_arg_size (enum machine_mode mode, const_tree type)
9224 return int_size_in_bytes (type);
9226 /* No type info available for some library calls ... */
9227 if (mode != BLKmode)
9228 return GET_MODE_SIZE (mode);
9230 /* If we have neither type nor mode, abort */
9234 /* Return true if a function argument of type TYPE and mode MODE
9235 is to be passed in a floating-point register, if available. */
9238 s390_function_arg_float (enum machine_mode mode, const_tree type)
9240 int size = s390_function_arg_size (mode, type);
9244 /* Soft-float changes the ABI: no floating-point registers are used. */
9245 if (TARGET_SOFT_FLOAT)
9248 /* No type info available for some library calls ... */
9250 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
9252 /* The ABI says that record types with a single member are treated
9253 just like that member would be. */
9254 while (TREE_CODE (type) == RECORD_TYPE)
9256 tree field, single = NULL_TREE;
9258 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
9260 if (TREE_CODE (field) != FIELD_DECL)
9263 if (single == NULL_TREE)
9264 single = TREE_TYPE (field);
9269 if (single == NULL_TREE)
9275 return TREE_CODE (type) == REAL_TYPE;
9278 /* Return true if a function argument of type TYPE and mode MODE
9279 is to be passed in an integer register, or a pair of integer
9280 registers, if available. */
9283 s390_function_arg_integer (enum machine_mode mode, const_tree type)
9285 int size = s390_function_arg_size (mode, type);
9289 /* No type info available for some library calls ... */
9291 return GET_MODE_CLASS (mode) == MODE_INT
9292 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
9294 /* We accept small integral (and similar) types. */
9295 if (INTEGRAL_TYPE_P (type)
9296 || POINTER_TYPE_P (type)
9297 || TREE_CODE (type) == NULLPTR_TYPE
9298 || TREE_CODE (type) == OFFSET_TYPE
9299 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
9302 /* We also accept structs of size 1, 2, 4, 8 that are not
9303 passed in floating-point registers. */
9304 if (AGGREGATE_TYPE_P (type)
9305 && exact_log2 (size) >= 0
9306 && !s390_function_arg_float (mode, type))
9312 /* Return 1 if a function argument of type TYPE and mode MODE
9313 is to be passed by reference. The ABI specifies that only
9314 structures of size 1, 2, 4, or 8 bytes are passed by value,
9315 all other structures (and complex numbers) are passed by
9319 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
9320 enum machine_mode mode, const_tree type,
9321 bool named ATTRIBUTE_UNUSED)
9323 int size = s390_function_arg_size (mode, type);
9329 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
9332 if (TREE_CODE (type) == COMPLEX_TYPE
9333 || TREE_CODE (type) == VECTOR_TYPE)
9340 /* Update the data in CUM to advance over an argument of mode MODE and
9341 data type TYPE. (TYPE is null for libcalls where that information
9342 may not be available.). The boolean NAMED specifies whether the
9343 argument is a named argument (as opposed to an unnamed argument
9344 matching an ellipsis). */
9347 s390_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
9348 const_tree type, bool named ATTRIBUTE_UNUSED)
9350 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9352 if (s390_function_arg_float (mode, type))
9356 else if (s390_function_arg_integer (mode, type))
9358 int size = s390_function_arg_size (mode, type);
9359 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
9365 /* Define where to put the arguments to a function.
9366 Value is zero to push the argument on the stack,
9367 or a hard register in which to store the argument.
9369 MODE is the argument's machine mode.
9370 TYPE is the data type of the argument (as a tree).
9371 This is null for libcalls where that information may
9373 CUM is a variable of type CUMULATIVE_ARGS which gives info about
9374 the preceding args and about the function being called.
9375 NAMED is nonzero if this argument is a named parameter
9376 (otherwise it is an extra parameter matching an ellipsis).
9378 On S/390, we use general purpose registers 2 through 6 to
9379 pass integer, pointer, and certain structure arguments, and
9380 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
9381 to pass floating point arguments. All remaining arguments
9382 are pushed to the stack. */
9385 s390_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
9386 const_tree type, bool named ATTRIBUTE_UNUSED)
9388 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9390 if (s390_function_arg_float (mode, type))
9392 if (cum->fprs + 1 > FP_ARG_NUM_REG)
9395 return gen_rtx_REG (mode, cum->fprs + 16);
9397 else if (s390_function_arg_integer (mode, type))
9399 int size = s390_function_arg_size (mode, type);
9400 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
9402 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
9404 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
9405 return gen_rtx_REG (mode, cum->gprs + 2);
9406 else if (n_gprs == 2)
9408 rtvec p = rtvec_alloc (2);
9411 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
9414 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
9417 return gen_rtx_PARALLEL (mode, p);
9421 /* After the real arguments, expand_call calls us once again
9422 with a void_type_node type. Whatever we return here is
9423 passed as operand 2 to the call expanders.
9425 We don't need this feature ... */
9426 else if (type == void_type_node)
9432 /* Return true if return values of type TYPE should be returned
9433 in a memory buffer whose address is passed by the caller as
9434 hidden first argument. */
9437 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
9439 /* We accept small integral (and similar) types. */
9440 if (INTEGRAL_TYPE_P (type)
9441 || POINTER_TYPE_P (type)
9442 || TREE_CODE (type) == OFFSET_TYPE
9443 || TREE_CODE (type) == REAL_TYPE)
9444 return int_size_in_bytes (type) > 8;
9446 /* Aggregates and similar constructs are always returned
9448 if (AGGREGATE_TYPE_P (type)
9449 || TREE_CODE (type) == COMPLEX_TYPE
9450 || TREE_CODE (type) == VECTOR_TYPE)
9453 /* ??? We get called on all sorts of random stuff from
9454 aggregate_value_p. We can't abort, but it's not clear
9455 what's safe to return. Pretend it's a struct I guess. */
9459 /* Function arguments and return values are promoted to word size. */
9461 static enum machine_mode
9462 s390_promote_function_mode (const_tree type, enum machine_mode mode,
9464 const_tree fntype ATTRIBUTE_UNUSED,
9465 int for_return ATTRIBUTE_UNUSED)
9467 if (INTEGRAL_MODE_P (mode)
9468 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
9470 if (type != NULL_TREE && POINTER_TYPE_P (type))
9471 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9478 /* Define where to return a (scalar) value of type RET_TYPE.
9479 If RET_TYPE is null, define where to return a (scalar)
9480 value of mode MODE from a libcall. */
9483 s390_function_and_libcall_value (enum machine_mode mode,
9484 const_tree ret_type,
9485 const_tree fntype_or_decl,
9486 bool outgoing ATTRIBUTE_UNUSED)
9488 /* For normal functions perform the promotion as
9489 promote_function_mode would do. */
9492 int unsignedp = TYPE_UNSIGNED (ret_type);
9493 mode = promote_function_mode (ret_type, mode, &unsignedp,
9497 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode));
9498 gcc_assert (GET_MODE_SIZE (mode) <= 8);
9500 if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
9501 return gen_rtx_REG (mode, 16);
9502 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
9503 || UNITS_PER_LONG == UNITS_PER_WORD)
9504 return gen_rtx_REG (mode, 2);
9505 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
9507 /* This case is triggered when returning a 64 bit value with
9508 -m31 -mzarch. Although the value would fit into a single
9509 register it has to be forced into a 32 bit register pair in
9510 order to match the ABI. */
9511 rtvec p = rtvec_alloc (2);
9514 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
9516 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
9518 return gen_rtx_PARALLEL (mode, p);
9524 /* Define where to return a scalar return value of type RET_TYPE. */
9527 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
9530 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
9531 fn_decl_or_type, outgoing);
9534 /* Define where to return a scalar libcall return value of mode
9538 s390_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9540 return s390_function_and_libcall_value (mode, NULL_TREE,
9545 /* Create and return the va_list datatype.
9547 On S/390, va_list is an array type equivalent to
9549 typedef struct __va_list_tag
9553 void *__overflow_arg_area;
9554 void *__reg_save_area;
9557 where __gpr and __fpr hold the number of general purpose
9558 or floating point arguments used up to now, respectively,
9559 __overflow_arg_area points to the stack location of the
9560 next argument passed on the stack, and __reg_save_area
9561 always points to the start of the register area in the
9562 call frame of the current function. The function prologue
9563 saves all registers used for argument passing into this
9564 area if the function uses variable arguments. */
9567 s390_build_builtin_va_list (void)
9569 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9571 record = lang_hooks.types.make_type (RECORD_TYPE);
9574 build_decl (BUILTINS_LOCATION,
9575 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9577 f_gpr = build_decl (BUILTINS_LOCATION,
9578 FIELD_DECL, get_identifier ("__gpr"),
9579 long_integer_type_node);
9580 f_fpr = build_decl (BUILTINS_LOCATION,
9581 FIELD_DECL, get_identifier ("__fpr"),
9582 long_integer_type_node);
9583 f_ovf = build_decl (BUILTINS_LOCATION,
9584 FIELD_DECL, get_identifier ("__overflow_arg_area"),
9586 f_sav = build_decl (BUILTINS_LOCATION,
9587 FIELD_DECL, get_identifier ("__reg_save_area"),
9590 va_list_gpr_counter_field = f_gpr;
9591 va_list_fpr_counter_field = f_fpr;
9593 DECL_FIELD_CONTEXT (f_gpr) = record;
9594 DECL_FIELD_CONTEXT (f_fpr) = record;
9595 DECL_FIELD_CONTEXT (f_ovf) = record;
9596 DECL_FIELD_CONTEXT (f_sav) = record;
9598 TYPE_STUB_DECL (record) = type_decl;
9599 TYPE_NAME (record) = type_decl;
9600 TYPE_FIELDS (record) = f_gpr;
9601 DECL_CHAIN (f_gpr) = f_fpr;
9602 DECL_CHAIN (f_fpr) = f_ovf;
9603 DECL_CHAIN (f_ovf) = f_sav;
9605 layout_type (record);
9607 /* The correct type is an array type of one element. */
9608 return build_array_type (record, build_index_type (size_zero_node));
9611 /* Implement va_start by filling the va_list structure VALIST.
9612 STDARG_P is always true, and ignored.
9613 NEXTARG points to the first anonymous stack argument.
9615 The following global variables are used to initialize
9616 the va_list structure:
9619 holds number of gprs and fprs used for named arguments.
9620 crtl->args.arg_offset_rtx:
9621 holds the offset of the first anonymous stack argument
9622 (relative to the virtual arg pointer). */
9625 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
9627 HOST_WIDE_INT n_gpr, n_fpr;
9629 tree f_gpr, f_fpr, f_ovf, f_sav;
9630 tree gpr, fpr, ovf, sav, t;
9632 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9633 f_fpr = DECL_CHAIN (f_gpr);
9634 f_ovf = DECL_CHAIN (f_fpr);
9635 f_sav = DECL_CHAIN (f_ovf);
9637 valist = build_simple_mem_ref (valist);
9638 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9639 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9640 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9641 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9643 /* Count number of gp and fp argument registers used. */
9645 n_gpr = crtl->args.info.gprs;
9646 n_fpr = crtl->args.info.fprs;
9648 if (cfun->va_list_gpr_size)
9650 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
9651 build_int_cst (NULL_TREE, n_gpr));
9652 TREE_SIDE_EFFECTS (t) = 1;
9653 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9656 if (cfun->va_list_fpr_size)
9658 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
9659 build_int_cst (NULL_TREE, n_fpr));
9660 TREE_SIDE_EFFECTS (t) = 1;
9661 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9664 /* Find the overflow area. */
9665 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
9666 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG)
9668 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
9670 off = INTVAL (crtl->args.arg_offset_rtx);
9671 off = off < 0 ? 0 : off;
9672 if (TARGET_DEBUG_ARG)
9673 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
9674 (int)n_gpr, (int)n_fpr, off);
9676 t = fold_build_pointer_plus_hwi (t, off);
9678 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
9679 TREE_SIDE_EFFECTS (t) = 1;
9680 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9683 /* Find the register save area. */
9684 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
9685 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
9687 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
9688 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
9690 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
9691 TREE_SIDE_EFFECTS (t) = 1;
9692 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9696 /* Implement va_arg by updating the va_list structure
9697 VALIST as required to retrieve an argument of type
9698 TYPE, and returning that argument.
9700 Generates code equivalent to:
9702 if (integral value) {
9703 if (size <= 4 && args.gpr < 5 ||
9704 size > 4 && args.gpr < 4 )
9705 ret = args.reg_save_area[args.gpr+8]
9707 ret = *args.overflow_arg_area++;
9708 } else if (float value) {
9710 ret = args.reg_save_area[args.fpr+64]
9712 ret = *args.overflow_arg_area++;
9713 } else if (aggregate value) {
9715 ret = *args.reg_save_area[args.gpr]
9717 ret = **args.overflow_arg_area++;
9721 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9722 gimple_seq *post_p ATTRIBUTE_UNUSED)
9724 tree f_gpr, f_fpr, f_ovf, f_sav;
9725 tree gpr, fpr, ovf, sav, reg, t, u;
9726 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
9727 tree lab_false, lab_over, addr;
9729 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9730 f_fpr = DECL_CHAIN (f_gpr);
9731 f_ovf = DECL_CHAIN (f_fpr);
9732 f_sav = DECL_CHAIN (f_ovf);
9734 valist = build_va_arg_indirect_ref (valist);
9735 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9736 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9737 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9739 /* The tree for args* cannot be shared between gpr/fpr and ovf since
9740 both appear on a lhs. */
9741 valist = unshare_expr (valist);
9742 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9744 size = int_size_in_bytes (type);
9746 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
9748 if (TARGET_DEBUG_ARG)
9750 fprintf (stderr, "va_arg: aggregate type");
9754 /* Aggregates are passed by reference. */
9759 /* kernel stack layout on 31 bit: It is assumed here that no padding
9760 will be added by s390_frame_info because for va_args always an even
9761 number of gprs has to be saved r15-r2 = 14 regs. */
9762 sav_ofs = 2 * UNITS_PER_LONG;
9763 sav_scale = UNITS_PER_LONG;
9764 size = UNITS_PER_LONG;
9765 max_reg = GP_ARG_NUM_REG - n_reg;
9767 else if (s390_function_arg_float (TYPE_MODE (type), type))
9769 if (TARGET_DEBUG_ARG)
9771 fprintf (stderr, "va_arg: float type");
9775 /* FP args go in FP registers, if present. */
9779 sav_ofs = 16 * UNITS_PER_LONG;
9781 max_reg = FP_ARG_NUM_REG - n_reg;
9785 if (TARGET_DEBUG_ARG)
9787 fprintf (stderr, "va_arg: other type");
9791 /* Otherwise into GP registers. */
9794 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
9796 /* kernel stack layout on 31 bit: It is assumed here that no padding
9797 will be added by s390_frame_info because for va_args always an even
9798 number of gprs has to be saved r15-r2 = 14 regs. */
9799 sav_ofs = 2 * UNITS_PER_LONG;
9801 if (size < UNITS_PER_LONG)
9802 sav_ofs += UNITS_PER_LONG - size;
9804 sav_scale = UNITS_PER_LONG;
9805 max_reg = GP_ARG_NUM_REG - n_reg;
9808 /* Pull the value out of the saved registers ... */
9810 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9811 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9812 addr = create_tmp_var (ptr_type_node, "addr");
9814 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
9815 t = build2 (GT_EXPR, boolean_type_node, reg, t);
9816 u = build1 (GOTO_EXPR, void_type_node, lab_false);
9817 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
9818 gimplify_and_add (t, pre_p);
9820 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
9821 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
9822 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
9823 t = fold_build_pointer_plus (t, u);
9825 gimplify_assign (addr, t, pre_p);
9827 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9829 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9832 /* ... Otherwise out of the overflow area. */
9835 if (size < UNITS_PER_LONG)
9836 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
9838 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9840 gimplify_assign (addr, t, pre_p);
9842 t = fold_build_pointer_plus_hwi (t, size);
9843 gimplify_assign (ovf, t, pre_p);
9845 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9848 /* Increment register save count. */
9850 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
9851 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
9852 gimplify_and_add (u, pre_p);
9856 t = build_pointer_type_for_mode (build_pointer_type (type),
9858 addr = fold_convert (t, addr);
9859 addr = build_va_arg_indirect_ref (addr);
9863 t = build_pointer_type_for_mode (type, ptr_mode, true);
9864 addr = fold_convert (t, addr);
9867 return build_va_arg_indirect_ref (addr);
9870 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
9872 DEST - Register location where CC will be stored.
9873 TDB - Pointer to a 256 byte area where to store the transaction.
9874 diagnostic block. NULL if TDB is not needed.
9875 RETRY - Retry count value. If non-NULL a retry loop for CC2
9877 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
9878 of the tbegin instruction pattern. */
9881 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
9883 rtx retry_plus_two = gen_reg_rtx (SImode);
9884 rtx retry_reg = gen_reg_rtx (SImode);
9885 rtx_code_label *retry_label = NULL;
9887 if (retry != NULL_RTX)
9889 emit_move_insn (retry_reg, retry);
9890 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
9891 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
9892 retry_label = gen_label_rtx ();
9893 emit_label (retry_label);
9897 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), tdb));
9899 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
9902 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
9903 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
9906 if (retry != NULL_RTX)
9908 const int CC0 = 1 << 3;
9909 const int CC1 = 1 << 2;
9910 const int CC3 = 1 << 0;
9912 rtx count = gen_reg_rtx (SImode);
9913 rtx_code_label *leave_label = gen_label_rtx ();
9915 /* Exit for success and permanent failures. */
9916 jump = s390_emit_jump (leave_label,
9917 gen_rtx_EQ (VOIDmode,
9918 gen_rtx_REG (CCRAWmode, CC_REGNUM),
9919 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
9920 LABEL_NUSES (leave_label) = 1;
9922 /* CC2 - transient failure. Perform retry with ppa. */
9923 emit_move_insn (count, retry_plus_two);
9924 emit_insn (gen_subsi3 (count, count, retry_reg));
9925 emit_insn (gen_tx_assist (count));
9926 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
9929 JUMP_LABEL (jump) = retry_label;
9930 LABEL_NUSES (retry_label) = 1;
9931 emit_label (leave_label);
9939 S390_BUILTIN_TBEGIN,
9940 S390_BUILTIN_TBEGIN_NOFLOAT,
9941 S390_BUILTIN_TBEGIN_RETRY,
9942 S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
9943 S390_BUILTIN_TBEGINC,
9945 S390_BUILTIN_TABORT,
9946 S390_BUILTIN_NON_TX_STORE,
9947 S390_BUILTIN_TX_NESTING_DEPTH,
9948 S390_BUILTIN_TX_ASSIST,
9953 static enum insn_code const code_for_builtin[S390_BUILTIN_max] = {
9955 CODE_FOR_tbegin_nofloat,
9956 CODE_FOR_tbegin_retry,
9957 CODE_FOR_tbegin_retry_nofloat,
9967 s390_init_builtins (void)
9969 tree ftype, uint64_type;
9970 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
9972 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
9974 /* void foo (void) */
9975 ftype = build_function_type_list (void_type_node, NULL_TREE);
9976 add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC,
9977 BUILT_IN_MD, NULL, NULL_TREE);
9979 /* void foo (int) */
9980 ftype = build_function_type_list (void_type_node, integer_type_node,
9982 add_builtin_function ("__builtin_tabort", ftype,
9983 S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, noreturn_attr);
9984 add_builtin_function ("__builtin_tx_assist", ftype,
9985 S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE);
9987 /* int foo (void *) */
9988 ftype = build_function_type_list (integer_type_node, ptr_type_node, NULL_TREE);
9989 add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN,
9990 BUILT_IN_MD, NULL, returns_twice_attr);
9991 add_builtin_function ("__builtin_tbegin_nofloat", ftype,
9992 S390_BUILTIN_TBEGIN_NOFLOAT,
9993 BUILT_IN_MD, NULL, returns_twice_attr);
9995 /* int foo (void *, int) */
9996 ftype = build_function_type_list (integer_type_node, ptr_type_node,
9997 integer_type_node, NULL_TREE);
9998 add_builtin_function ("__builtin_tbegin_retry", ftype,
9999 S390_BUILTIN_TBEGIN_RETRY,
10001 NULL, returns_twice_attr);
10002 add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype,
10003 S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
10005 NULL, returns_twice_attr);
10007 /* int foo (void) */
10008 ftype = build_function_type_list (integer_type_node, NULL_TREE);
10009 add_builtin_function ("__builtin_tx_nesting_depth", ftype,
10010 S390_BUILTIN_TX_NESTING_DEPTH,
10011 BUILT_IN_MD, NULL, NULL_TREE);
10012 add_builtin_function ("__builtin_tend", ftype,
10013 S390_BUILTIN_TEND, BUILT_IN_MD, NULL, NULL_TREE);
10015 /* void foo (uint64_t *, uint64_t) */
10017 uint64_type = long_unsigned_type_node;
10019 uint64_type = long_long_unsigned_type_node;
10021 ftype = build_function_type_list (void_type_node,
10022 build_pointer_type (uint64_type),
10023 uint64_type, NULL_TREE);
10024 add_builtin_function ("__builtin_non_tx_store", ftype,
10025 S390_BUILTIN_NON_TX_STORE,
10026 BUILT_IN_MD, NULL, NULL_TREE);
10029 /* Expand an expression EXP that calls a built-in function,
10030 with result going to TARGET if that's convenient
10031 (and in mode MODE if that's convenient).
10032 SUBTARGET may be used as the target for computing one of EXP's operands.
10033 IGNORE is nonzero if the value is to be ignored. */
10036 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10037 enum machine_mode mode ATTRIBUTE_UNUSED,
10038 int ignore ATTRIBUTE_UNUSED)
10042 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10043 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10044 enum insn_code icode;
10045 rtx op[MAX_ARGS], pat;
10049 call_expr_arg_iterator iter;
10051 if (fcode >= S390_BUILTIN_max)
10052 internal_error ("bad builtin fcode");
10053 icode = code_for_builtin[fcode];
10055 internal_error ("bad builtin fcode");
10058 error ("Transactional execution builtins not enabled (-mhtm)\n");
10060 /* Set a flag in the machine specific cfun part in order to support
10061 saving/restoring of FPRs. */
10062 if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY)
10063 cfun->machine->tbegin_p = true;
10065 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10068 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10070 const struct insn_operand_data *insn_op;
10072 if (arg == error_mark_node)
10074 if (arity >= MAX_ARGS)
10077 insn_op = &insn_data[icode].operand[arity + nonvoid];
10079 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
10081 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
10083 if (insn_op->predicate == memory_operand)
10085 /* Don't move a NULL pointer into a register. Otherwise
10086 we have to rely on combine being able to move it back
10087 in order to get an immediate 0 in the instruction. */
10088 if (op[arity] != const0_rtx)
10089 op[arity] = copy_to_mode_reg (Pmode, op[arity]);
10090 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
10093 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
10101 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10103 || GET_MODE (target) != tmode
10104 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
10105 target = gen_reg_rtx (tmode);
10111 pat = GEN_FCN (icode) (target);
10115 pat = GEN_FCN (icode) (target, op[0]);
10117 pat = GEN_FCN (icode) (op[0]);
10121 pat = GEN_FCN (icode) (target, op[0], op[1]);
10123 pat = GEN_FCN (icode) (op[0], op[1]);
10126 gcc_unreachable ();
10138 /* We call mcount before the function prologue. So a profiled leaf
10139 function should stay a leaf function. */
10142 s390_keep_leaf_when_profiled ()
10147 /* Output assembly code for the trampoline template to
10150 On S/390, we use gpr 1 internally in the trampoline code;
10151 gpr 0 is used to hold the static chain. */
10154 s390_asm_trampoline_template (FILE *file)
10157 op[0] = gen_rtx_REG (Pmode, 0);
10158 op[1] = gen_rtx_REG (Pmode, 1);
10162 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
10163 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
10164 output_asm_insn ("br\t%1", op); /* 2 byte */
10165 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
10169 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
10170 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
10171 output_asm_insn ("br\t%1", op); /* 2 byte */
10172 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
10176 /* Emit RTL insns to initialize the variable parts of a trampoline.
10177 FNADDR is an RTX for the address of the function's pure code.
10178 CXT is an RTX for the static chain value for the function. */
10181 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10183 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10186 emit_block_move (m_tramp, assemble_trampoline_template (),
10187 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
10189 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
10190 emit_move_insn (mem, cxt);
10191 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
10192 emit_move_insn (mem, fnaddr);
10195 /* Output assembler code to FILE to increment profiler label # LABELNO
10196 for profiling a function entry. */
10199 s390_function_profiler (FILE *file, int labelno)
10204 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
10206 fprintf (file, "# function profiler \n");
10208 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
10209 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
10210 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
10212 op[2] = gen_rtx_REG (Pmode, 1);
10213 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
10214 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
10216 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
10219 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
10220 op[4] = gen_rtx_CONST (Pmode, op[4]);
10225 output_asm_insn ("stg\t%0,%1", op);
10226 output_asm_insn ("larl\t%2,%3", op);
10227 output_asm_insn ("brasl\t%0,%4", op);
10228 output_asm_insn ("lg\t%0,%1", op);
10230 else if (!flag_pic)
10232 op[6] = gen_label_rtx ();
10234 output_asm_insn ("st\t%0,%1", op);
10235 output_asm_insn ("bras\t%2,%l6", op);
10236 output_asm_insn (".long\t%4", op);
10237 output_asm_insn (".long\t%3", op);
10238 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
10239 output_asm_insn ("l\t%0,0(%2)", op);
10240 output_asm_insn ("l\t%2,4(%2)", op);
10241 output_asm_insn ("basr\t%0,%0", op);
10242 output_asm_insn ("l\t%0,%1", op);
10246 op[5] = gen_label_rtx ();
10247 op[6] = gen_label_rtx ();
10249 output_asm_insn ("st\t%0,%1", op);
10250 output_asm_insn ("bras\t%2,%l6", op);
10251 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
10252 output_asm_insn (".long\t%4-%l5", op);
10253 output_asm_insn (".long\t%3-%l5", op);
10254 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
10255 output_asm_insn ("lr\t%0,%2", op);
10256 output_asm_insn ("a\t%0,0(%2)", op);
10257 output_asm_insn ("a\t%2,4(%2)", op);
10258 output_asm_insn ("basr\t%0,%0", op);
10259 output_asm_insn ("l\t%0,%1", op);
10263 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
10264 into its SYMBOL_REF_FLAGS. */
10267 s390_encode_section_info (tree decl, rtx rtl, int first)
10269 default_encode_section_info (decl, rtl, first);
10271 if (TREE_CODE (decl) == VAR_DECL)
10273 /* If a variable has a forced alignment to < 2 bytes, mark it
10274 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
10276 if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
10277 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
10278 if (!DECL_SIZE (decl)
10279 || !DECL_ALIGN (decl)
10280 || !tree_fits_shwi_p (DECL_SIZE (decl))
10281 || (DECL_ALIGN (decl) <= 64
10282 && DECL_ALIGN (decl) != tree_to_shwi (DECL_SIZE (decl))))
10283 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
10286 /* Literal pool references don't have a decl so they are handled
10287 differently here. We rely on the information in the MEM_ALIGN
10288 entry to decide upon natural alignment. */
10290 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
10291 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
10292 && (MEM_ALIGN (rtl) == 0
10293 || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
10294 || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
10295 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
10298 /* Output thunk to FILE that implements a C++ virtual function call (with
10299 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
10300 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
10301 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
10302 relative to the resulting this pointer. */
10305 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10306 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10312 /* Make sure unwind info is emitted for the thunk if needed. */
10313 final_start_function (emit_barrier (), file, 1);
10315 /* Operand 0 is the target function. */
10316 op[0] = XEXP (DECL_RTL (function), 0);
10317 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
10320 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
10321 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
10322 op[0] = gen_rtx_CONST (Pmode, op[0]);
10325 /* Operand 1 is the 'this' pointer. */
10326 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10327 op[1] = gen_rtx_REG (Pmode, 3);
10329 op[1] = gen_rtx_REG (Pmode, 2);
10331 /* Operand 2 is the delta. */
10332 op[2] = GEN_INT (delta);
10334 /* Operand 3 is the vcall_offset. */
10335 op[3] = GEN_INT (vcall_offset);
10337 /* Operand 4 is the temporary register. */
10338 op[4] = gen_rtx_REG (Pmode, 1);
10340 /* Operands 5 to 8 can be used as labels. */
10346 /* Operand 9 can be used for temporary register. */
10349 /* Generate code. */
10352 /* Setup literal pool pointer if required. */
10353 if ((!DISP_IN_RANGE (delta)
10354 && !CONST_OK_FOR_K (delta)
10355 && !CONST_OK_FOR_Os (delta))
10356 || (!DISP_IN_RANGE (vcall_offset)
10357 && !CONST_OK_FOR_K (vcall_offset)
10358 && !CONST_OK_FOR_Os (vcall_offset)))
10360 op[5] = gen_label_rtx ();
10361 output_asm_insn ("larl\t%4,%5", op);
10364 /* Add DELTA to this pointer. */
10367 if (CONST_OK_FOR_J (delta))
10368 output_asm_insn ("la\t%1,%2(%1)", op);
10369 else if (DISP_IN_RANGE (delta))
10370 output_asm_insn ("lay\t%1,%2(%1)", op);
10371 else if (CONST_OK_FOR_K (delta))
10372 output_asm_insn ("aghi\t%1,%2", op);
10373 else if (CONST_OK_FOR_Os (delta))
10374 output_asm_insn ("agfi\t%1,%2", op);
10377 op[6] = gen_label_rtx ();
10378 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
10382 /* Perform vcall adjustment. */
10385 if (DISP_IN_RANGE (vcall_offset))
10387 output_asm_insn ("lg\t%4,0(%1)", op);
10388 output_asm_insn ("ag\t%1,%3(%4)", op);
10390 else if (CONST_OK_FOR_K (vcall_offset))
10392 output_asm_insn ("lghi\t%4,%3", op);
10393 output_asm_insn ("ag\t%4,0(%1)", op);
10394 output_asm_insn ("ag\t%1,0(%4)", op);
10396 else if (CONST_OK_FOR_Os (vcall_offset))
10398 output_asm_insn ("lgfi\t%4,%3", op);
10399 output_asm_insn ("ag\t%4,0(%1)", op);
10400 output_asm_insn ("ag\t%1,0(%4)", op);
10404 op[7] = gen_label_rtx ();
10405 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
10406 output_asm_insn ("ag\t%4,0(%1)", op);
10407 output_asm_insn ("ag\t%1,0(%4)", op);
10411 /* Jump to target. */
10412 output_asm_insn ("jg\t%0", op);
10414 /* Output literal pool if required. */
10417 output_asm_insn (".align\t4", op);
10418 targetm.asm_out.internal_label (file, "L",
10419 CODE_LABEL_NUMBER (op[5]));
10423 targetm.asm_out.internal_label (file, "L",
10424 CODE_LABEL_NUMBER (op[6]));
10425 output_asm_insn (".long\t%2", op);
10429 targetm.asm_out.internal_label (file, "L",
10430 CODE_LABEL_NUMBER (op[7]));
10431 output_asm_insn (".long\t%3", op);
10436 /* Setup base pointer if required. */
10438 || (!DISP_IN_RANGE (delta)
10439 && !CONST_OK_FOR_K (delta)
10440 && !CONST_OK_FOR_Os (delta))
10441 || (!DISP_IN_RANGE (delta)
10442 && !CONST_OK_FOR_K (vcall_offset)
10443 && !CONST_OK_FOR_Os (vcall_offset)))
10445 op[5] = gen_label_rtx ();
10446 output_asm_insn ("basr\t%4,0", op);
10447 targetm.asm_out.internal_label (file, "L",
10448 CODE_LABEL_NUMBER (op[5]));
10451 /* Add DELTA to this pointer. */
10454 if (CONST_OK_FOR_J (delta))
10455 output_asm_insn ("la\t%1,%2(%1)", op);
10456 else if (DISP_IN_RANGE (delta))
10457 output_asm_insn ("lay\t%1,%2(%1)", op);
10458 else if (CONST_OK_FOR_K (delta))
10459 output_asm_insn ("ahi\t%1,%2", op);
10460 else if (CONST_OK_FOR_Os (delta))
10461 output_asm_insn ("afi\t%1,%2", op);
10464 op[6] = gen_label_rtx ();
10465 output_asm_insn ("a\t%1,%6-%5(%4)", op);
10469 /* Perform vcall adjustment. */
10472 if (CONST_OK_FOR_J (vcall_offset))
10474 output_asm_insn ("l\t%4,0(%1)", op);
10475 output_asm_insn ("a\t%1,%3(%4)", op);
10477 else if (DISP_IN_RANGE (vcall_offset))
10479 output_asm_insn ("l\t%4,0(%1)", op);
10480 output_asm_insn ("ay\t%1,%3(%4)", op);
10482 else if (CONST_OK_FOR_K (vcall_offset))
10484 output_asm_insn ("lhi\t%4,%3", op);
10485 output_asm_insn ("a\t%4,0(%1)", op);
10486 output_asm_insn ("a\t%1,0(%4)", op);
10488 else if (CONST_OK_FOR_Os (vcall_offset))
10490 output_asm_insn ("iilf\t%4,%3", op);
10491 output_asm_insn ("a\t%4,0(%1)", op);
10492 output_asm_insn ("a\t%1,0(%4)", op);
10496 op[7] = gen_label_rtx ();
10497 output_asm_insn ("l\t%4,%7-%5(%4)", op);
10498 output_asm_insn ("a\t%4,0(%1)", op);
10499 output_asm_insn ("a\t%1,0(%4)", op);
10502 /* We had to clobber the base pointer register.
10503 Re-setup the base pointer (with a different base). */
10504 op[5] = gen_label_rtx ();
10505 output_asm_insn ("basr\t%4,0", op);
10506 targetm.asm_out.internal_label (file, "L",
10507 CODE_LABEL_NUMBER (op[5]));
10510 /* Jump to target. */
10511 op[8] = gen_label_rtx ();
10514 output_asm_insn ("l\t%4,%8-%5(%4)", op);
10515 else if (!nonlocal)
10516 output_asm_insn ("a\t%4,%8-%5(%4)", op);
10517 /* We cannot call through .plt, since .plt requires %r12 loaded. */
10518 else if (flag_pic == 1)
10520 output_asm_insn ("a\t%4,%8-%5(%4)", op);
10521 output_asm_insn ("l\t%4,%0(%4)", op);
10523 else if (flag_pic == 2)
10525 op[9] = gen_rtx_REG (Pmode, 0);
10526 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
10527 output_asm_insn ("a\t%4,%8-%5(%4)", op);
10528 output_asm_insn ("ar\t%4,%9", op);
10529 output_asm_insn ("l\t%4,0(%4)", op);
10532 output_asm_insn ("br\t%4", op);
10534 /* Output literal pool. */
10535 output_asm_insn (".align\t4", op);
10537 if (nonlocal && flag_pic == 2)
10538 output_asm_insn (".long\t%0", op);
10541 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10542 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
10545 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
10547 output_asm_insn (".long\t%0", op);
10549 output_asm_insn (".long\t%0-%5", op);
10553 targetm.asm_out.internal_label (file, "L",
10554 CODE_LABEL_NUMBER (op[6]));
10555 output_asm_insn (".long\t%2", op);
10559 targetm.asm_out.internal_label (file, "L",
10560 CODE_LABEL_NUMBER (op[7]));
10561 output_asm_insn (".long\t%3", op);
10564 final_end_function ();
10568 s390_valid_pointer_mode (enum machine_mode mode)
10570 return (mode == SImode || (TARGET_64BIT && mode == DImode));
10573 /* Checks whether the given CALL_EXPR would use a caller
10574 saved register. This is used to decide whether sibling call
10575 optimization could be performed on the respective function
10579 s390_call_saved_register_used (tree call_expr)
10581 CUMULATIVE_ARGS cum_v;
10582 cumulative_args_t cum;
10584 enum machine_mode mode;
10589 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
10590 cum = pack_cumulative_args (&cum_v);
10592 for (i = 0; i < call_expr_nargs (call_expr); i++)
10594 parameter = CALL_EXPR_ARG (call_expr, i);
10595 gcc_assert (parameter);
10597 /* For an undeclared variable passed as parameter we will get
10598 an ERROR_MARK node here. */
10599 if (TREE_CODE (parameter) == ERROR_MARK)
10602 type = TREE_TYPE (parameter);
10605 mode = TYPE_MODE (type);
10608 if (pass_by_reference (&cum_v, mode, type, true))
10611 type = build_pointer_type (type);
10614 parm_rtx = s390_function_arg (cum, mode, type, 0);
10616 s390_function_arg_advance (cum, mode, type, 0);
10621 if (REG_P (parm_rtx))
10624 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
10626 if (!call_used_regs[reg + REGNO (parm_rtx)])
10630 if (GET_CODE (parm_rtx) == PARALLEL)
10634 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
10636 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
10638 gcc_assert (REG_P (r));
10641 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
10643 if (!call_used_regs[reg + REGNO (r)])
10652 /* Return true if the given call expression can be
10653 turned into a sibling call.
10654 DECL holds the declaration of the function to be called whereas
10655 EXP is the call expression itself. */
10658 s390_function_ok_for_sibcall (tree decl, tree exp)
10660 /* The TPF epilogue uses register 1. */
10661 if (TARGET_TPF_PROFILING)
10664 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
10665 which would have to be restored before the sibcall. */
10666 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
10669 /* Register 6 on s390 is available as an argument register but unfortunately
10670 "caller saved". This makes functions needing this register for arguments
10671 not suitable for sibcalls. */
10672 return !s390_call_saved_register_used (exp);
10675 /* Return the fixed registers used for condition codes. */
10678 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10681 *p2 = INVALID_REGNUM;
10686 /* This function is used by the call expanders of the machine description.
10687 It emits the call insn itself together with the necessary operations
10688 to adjust the target address and returns the emitted insn.
10689 ADDR_LOCATION is the target address rtx
10690 TLS_CALL the location of the thread-local symbol
10691 RESULT_REG the register where the result of the call should be stored
10692 RETADDR_REG the register where the return address should be stored
10693 If this parameter is NULL_RTX the call is considered
10694 to be a sibling call. */
10697 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
10700 bool plt_call = false;
10706 /* Direct function calls need special treatment. */
10707 if (GET_CODE (addr_location) == SYMBOL_REF)
10709 /* When calling a global routine in PIC mode, we must
10710 replace the symbol itself with the PLT stub. */
10711 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
10713 if (retaddr_reg != NULL_RTX)
10715 addr_location = gen_rtx_UNSPEC (Pmode,
10716 gen_rtvec (1, addr_location),
10718 addr_location = gen_rtx_CONST (Pmode, addr_location);
10722 /* For -fpic code the PLT entries might use r12 which is
10723 call-saved. Therefore we cannot do a sibcall when
10724 calling directly using a symbol ref. When reaching
10725 this point we decided (in s390_function_ok_for_sibcall)
10726 to do a sibcall for a function pointer but one of the
10727 optimizers was able to get rid of the function pointer
10728 by propagating the symbol ref into the call. This
10729 optimization is illegal for S/390 so we turn the direct
10730 call into a indirect call again. */
10731 addr_location = force_reg (Pmode, addr_location);
10734 /* Unless we can use the bras(l) insn, force the
10735 routine address into a register. */
10736 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
10739 addr_location = legitimize_pic_address (addr_location, 0);
10741 addr_location = force_reg (Pmode, addr_location);
10745 /* If it is already an indirect call or the code above moved the
10746 SYMBOL_REF to somewhere else make sure the address can be found in
10748 if (retaddr_reg == NULL_RTX
10749 && GET_CODE (addr_location) != SYMBOL_REF
10752 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
10753 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
10756 addr_location = gen_rtx_MEM (QImode, addr_location);
10757 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
10759 if (result_reg != NULL_RTX)
10760 call = gen_rtx_SET (VOIDmode, result_reg, call);
10762 if (retaddr_reg != NULL_RTX)
10764 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
10766 if (tls_call != NULL_RTX)
10767 vec = gen_rtvec (3, call, clobber,
10768 gen_rtx_USE (VOIDmode, tls_call));
10770 vec = gen_rtvec (2, call, clobber);
10772 call = gen_rtx_PARALLEL (VOIDmode, vec);
10775 insn = emit_call_insn (call);
10777 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
10778 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
10780 /* s390_function_ok_for_sibcall should
10781 have denied sibcalls in this case. */
10782 gcc_assert (retaddr_reg != NULL_RTX);
10783 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
10788 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
10791 s390_conditional_register_usage (void)
10797 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10798 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10800 if (TARGET_CPU_ZARCH)
10802 fixed_regs[BASE_REGNUM] = 0;
10803 call_used_regs[BASE_REGNUM] = 0;
10804 fixed_regs[RETURN_REGNUM] = 0;
10805 call_used_regs[RETURN_REGNUM] = 0;
10809 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
10810 call_used_regs[i] = call_really_used_regs[i] = 0;
10814 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
10815 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
10818 if (TARGET_SOFT_FLOAT)
10820 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
10821 call_used_regs[i] = fixed_regs[i] = 1;
10825 /* Corresponding function to eh_return expander. */
10827 static GTY(()) rtx s390_tpf_eh_return_symbol;
10829 s390_emit_tpf_eh_return (rtx target)
10834 if (!s390_tpf_eh_return_symbol)
10835 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
10837 reg = gen_rtx_REG (Pmode, 2);
10838 orig_ra = gen_rtx_REG (Pmode, 3);
10840 emit_move_insn (reg, target);
10841 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
10842 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
10843 gen_rtx_REG (Pmode, RETURN_REGNUM));
10844 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
10845 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
10847 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
10850 /* Rework the prologue/epilogue to avoid saving/restoring
10851 registers unnecessarily. */
10854 s390_optimize_prologue (void)
10856 rtx_insn *insn, *new_insn, *next_insn;
10858 /* Do a final recompute of the frame-related data. */
10859 s390_optimize_register_info ();
10861 /* If all special registers are in fact used, there's nothing we
10862 can do, so no point in walking the insn list. */
10864 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
10865 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
10866 && (TARGET_CPU_ZARCH
10867 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
10868 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
10871 /* Search for prologue/epilogue insns and replace them. */
10873 for (insn = get_insns (); insn; insn = next_insn)
10875 int first, last, off;
10876 rtx set, base, offset;
10879 next_insn = NEXT_INSN (insn);
10881 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10884 pat = PATTERN (insn);
10886 /* Remove ldgr/lgdr instructions used for saving and restore
10887 GPRs if possible. */
10889 && GET_CODE (pat) == SET
10890 && GET_MODE (SET_SRC (pat)) == DImode
10891 && REG_P (SET_SRC (pat))
10892 && REG_P (SET_DEST (pat)))
10894 int src_regno = REGNO (SET_SRC (pat));
10895 int dest_regno = REGNO (SET_DEST (pat));
10899 if (!((GENERAL_REGNO_P (src_regno) && FP_REGNO_P (dest_regno))
10900 || (FP_REGNO_P (src_regno) && GENERAL_REGNO_P (dest_regno))))
10903 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
10904 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
10906 /* GPR must be call-saved, FPR must be call-clobbered. */
10907 if (!call_really_used_regs[fpr_regno]
10908 || call_really_used_regs[gpr_regno])
10911 /* It must not happen that what we once saved in an FPR now
10912 needs a stack slot. */
10913 gcc_assert (cfun_gpr_save_slot (gpr_regno) != -1);
10915 if (cfun_gpr_save_slot (gpr_regno) == 0)
10917 remove_insn (insn);
10922 if (GET_CODE (pat) == PARALLEL
10923 && store_multiple_operation (pat, VOIDmode))
10925 set = XVECEXP (pat, 0, 0);
10926 first = REGNO (SET_SRC (set));
10927 last = first + XVECLEN (pat, 0) - 1;
10928 offset = const0_rtx;
10929 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10930 off = INTVAL (offset);
10932 if (GET_CODE (base) != REG || off < 0)
10934 if (cfun_frame_layout.first_save_gpr != -1
10935 && (cfun_frame_layout.first_save_gpr < first
10936 || cfun_frame_layout.last_save_gpr > last))
10938 if (REGNO (base) != STACK_POINTER_REGNUM
10939 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10941 if (first > BASE_REGNUM || last < BASE_REGNUM)
10944 if (cfun_frame_layout.first_save_gpr != -1)
10946 rtx s_pat = save_gprs (base,
10947 off + (cfun_frame_layout.first_save_gpr
10948 - first) * UNITS_PER_LONG,
10949 cfun_frame_layout.first_save_gpr,
10950 cfun_frame_layout.last_save_gpr);
10951 new_insn = emit_insn_before (s_pat, insn);
10952 INSN_ADDRESSES_NEW (new_insn, -1);
10955 remove_insn (insn);
10959 if (cfun_frame_layout.first_save_gpr == -1
10960 && GET_CODE (pat) == SET
10961 && GENERAL_REG_P (SET_SRC (pat))
10962 && GET_CODE (SET_DEST (pat)) == MEM)
10965 first = REGNO (SET_SRC (set));
10966 offset = const0_rtx;
10967 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10968 off = INTVAL (offset);
10970 if (GET_CODE (base) != REG || off < 0)
10972 if (REGNO (base) != STACK_POINTER_REGNUM
10973 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10976 remove_insn (insn);
10980 if (GET_CODE (pat) == PARALLEL
10981 && load_multiple_operation (pat, VOIDmode))
10983 set = XVECEXP (pat, 0, 0);
10984 first = REGNO (SET_DEST (set));
10985 last = first + XVECLEN (pat, 0) - 1;
10986 offset = const0_rtx;
10987 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
10988 off = INTVAL (offset);
10990 if (GET_CODE (base) != REG || off < 0)
10993 if (cfun_frame_layout.first_restore_gpr != -1
10994 && (cfun_frame_layout.first_restore_gpr < first
10995 || cfun_frame_layout.last_restore_gpr > last))
10997 if (REGNO (base) != STACK_POINTER_REGNUM
10998 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
11000 if (first > BASE_REGNUM || last < BASE_REGNUM)
11003 if (cfun_frame_layout.first_restore_gpr != -1)
11005 rtx rpat = restore_gprs (base,
11006 off + (cfun_frame_layout.first_restore_gpr
11007 - first) * UNITS_PER_LONG,
11008 cfun_frame_layout.first_restore_gpr,
11009 cfun_frame_layout.last_restore_gpr);
11011 /* Remove REG_CFA_RESTOREs for registers that we no
11012 longer need to save. */
11013 REG_NOTES (rpat) = REG_NOTES (insn);
11014 for (rtx *ptr = ®_NOTES (rpat); *ptr; )
11015 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
11016 && ((int) REGNO (XEXP (*ptr, 0))
11017 < cfun_frame_layout.first_restore_gpr))
11018 *ptr = XEXP (*ptr, 1);
11020 ptr = &XEXP (*ptr, 1);
11021 new_insn = emit_insn_before (rpat, insn);
11022 RTX_FRAME_RELATED_P (new_insn) = 1;
11023 INSN_ADDRESSES_NEW (new_insn, -1);
11026 remove_insn (insn);
11030 if (cfun_frame_layout.first_restore_gpr == -1
11031 && GET_CODE (pat) == SET
11032 && GENERAL_REG_P (SET_DEST (pat))
11033 && GET_CODE (SET_SRC (pat)) == MEM)
11036 first = REGNO (SET_DEST (set));
11037 offset = const0_rtx;
11038 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
11039 off = INTVAL (offset);
11041 if (GET_CODE (base) != REG || off < 0)
11044 if (REGNO (base) != STACK_POINTER_REGNUM
11045 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
11048 remove_insn (insn);
11054 /* On z10 and later the dynamic branch prediction must see the
11055 backward jump within a certain windows. If not it falls back to
11056 the static prediction. This function rearranges the loop backward
11057 branch in a way which makes the static prediction always correct.
11058 The function returns true if it added an instruction. */
11060 s390_fix_long_loop_prediction (rtx_insn *insn)
11062 rtx set = single_set (insn);
11063 rtx code_label, label_ref, new_label;
11064 rtx_insn *uncond_jump;
11065 rtx_insn *cur_insn;
11069 /* This will exclude branch on count and branch on index patterns
11070 since these are correctly statically predicted. */
11072 || SET_DEST (set) != pc_rtx
11073 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
11076 /* Skip conditional returns. */
11077 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
11078 && XEXP (SET_SRC (set), 2) == pc_rtx)
11081 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
11082 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
11084 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
11086 code_label = XEXP (label_ref, 0);
11088 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
11089 || INSN_ADDRESSES (INSN_UID (insn)) == -1
11090 || (INSN_ADDRESSES (INSN_UID (insn))
11091 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
11094 for (distance = 0, cur_insn = PREV_INSN (insn);
11095 distance < PREDICT_DISTANCE - 6;
11096 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
11097 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
11100 new_label = gen_label_rtx ();
11101 uncond_jump = emit_jump_insn_after (
11102 gen_rtx_SET (VOIDmode, pc_rtx,
11103 gen_rtx_LABEL_REF (VOIDmode, code_label)),
11105 emit_label_after (new_label, uncond_jump);
11107 tmp = XEXP (SET_SRC (set), 1);
11108 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
11109 XEXP (SET_SRC (set), 2) = tmp;
11110 INSN_CODE (insn) = -1;
11112 XEXP (label_ref, 0) = new_label;
11113 JUMP_LABEL (insn) = new_label;
11114 JUMP_LABEL (uncond_jump) = code_label;
11119 /* Returns 1 if INSN reads the value of REG for purposes not related
11120 to addressing of memory, and 0 otherwise. */
11122 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
11124 return reg_referenced_p (reg, PATTERN (insn))
11125 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
11128 /* Starting from INSN find_cond_jump looks downwards in the insn
11129 stream for a single jump insn which is the last user of the
11130 condition code set in INSN. */
11132 find_cond_jump (rtx_insn *insn)
11134 for (; insn; insn = NEXT_INSN (insn))
11138 if (LABEL_P (insn))
11141 if (!JUMP_P (insn))
11143 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
11148 /* This will be triggered by a return. */
11149 if (GET_CODE (PATTERN (insn)) != SET)
11152 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
11153 ite = SET_SRC (PATTERN (insn));
11155 if (GET_CODE (ite) != IF_THEN_ELSE)
11158 cc = XEXP (XEXP (ite, 0), 0);
11159 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
11162 if (find_reg_note (insn, REG_DEAD, cc))
11170 /* Swap the condition in COND and the operands in OP0 and OP1 so that
11171 the semantics does not change. If NULL_RTX is passed as COND the
11172 function tries to find the conditional jump starting with INSN. */
11174 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
11178 if (cond == NULL_RTX)
11180 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
11181 rtx set = jump ? single_set (jump) : NULL_RTX;
11183 if (set == NULL_RTX)
11186 cond = XEXP (SET_SRC (set), 0);
11191 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
11194 /* On z10, instructions of the compare-and-branch family have the
11195 property to access the register occurring as second operand with
11196 its bits complemented. If such a compare is grouped with a second
11197 instruction that accesses the same register non-complemented, and
11198 if that register's value is delivered via a bypass, then the
11199 pipeline recycles, thereby causing significant performance decline.
11200 This function locates such situations and exchanges the two
11201 operands of the compare. The function return true whenever it
11204 s390_z10_optimize_cmp (rtx_insn *insn)
11206 rtx_insn *prev_insn, *next_insn;
11207 bool insn_added_p = false;
11208 rtx cond, *op0, *op1;
11210 if (GET_CODE (PATTERN (insn)) == PARALLEL)
11212 /* Handle compare and branch and branch on count
11214 rtx pattern = single_set (insn);
11217 || SET_DEST (pattern) != pc_rtx
11218 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
11221 cond = XEXP (SET_SRC (pattern), 0);
11222 op0 = &XEXP (cond, 0);
11223 op1 = &XEXP (cond, 1);
11225 else if (GET_CODE (PATTERN (insn)) == SET)
11229 /* Handle normal compare instructions. */
11230 src = SET_SRC (PATTERN (insn));
11231 dest = SET_DEST (PATTERN (insn));
11234 || !CC_REGNO_P (REGNO (dest))
11235 || GET_CODE (src) != COMPARE)
11238 /* s390_swap_cmp will try to find the conditional
11239 jump when passing NULL_RTX as condition. */
11241 op0 = &XEXP (src, 0);
11242 op1 = &XEXP (src, 1);
11247 if (!REG_P (*op0) || !REG_P (*op1))
11250 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
11253 /* Swap the COMPARE arguments and its mask if there is a
11254 conflicting access in the previous insn. */
11255 prev_insn = prev_active_insn (insn);
11256 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
11257 && reg_referenced_p (*op1, PATTERN (prev_insn)))
11258 s390_swap_cmp (cond, op0, op1, insn);
11260 /* Check if there is a conflict with the next insn. If there
11261 was no conflict with the previous insn, then swap the
11262 COMPARE arguments and its mask. If we already swapped
11263 the operands, or if swapping them would cause a conflict
11264 with the previous insn, issue a NOP after the COMPARE in
11265 order to separate the two instuctions. */
11266 next_insn = next_active_insn (insn);
11267 if (next_insn != NULL_RTX && INSN_P (next_insn)
11268 && s390_non_addr_reg_read_p (*op1, next_insn))
11270 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
11271 && s390_non_addr_reg_read_p (*op0, prev_insn))
11273 if (REGNO (*op1) == 0)
11274 emit_insn_after (gen_nop1 (), insn);
11276 emit_insn_after (gen_nop (), insn);
11277 insn_added_p = true;
11280 s390_swap_cmp (cond, op0, op1, insn);
11282 return insn_added_p;
11285 /* Perform machine-dependent processing. */
11290 bool pool_overflow = false;
11292 /* Make sure all splits have been performed; splits after
11293 machine_dependent_reorg might confuse insn length counts. */
11294 split_all_insns_noflow ();
11296 /* Install the main literal pool and the associated base
11297 register load insns.
11299 In addition, there are two problematic situations we need
11302 - the literal pool might be > 4096 bytes in size, so that
11303 some of its elements cannot be directly accessed
11305 - a branch target might be > 64K away from the branch, so that
11306 it is not possible to use a PC-relative instruction.
11308 To fix those, we split the single literal pool into multiple
11309 pool chunks, reloading the pool base register at various
11310 points throughout the function to ensure it always points to
11311 the pool chunk the following code expects, and / or replace
11312 PC-relative branches by absolute branches.
11314 However, the two problems are interdependent: splitting the
11315 literal pool can move a branch further away from its target,
11316 causing the 64K limit to overflow, and on the other hand,
11317 replacing a PC-relative branch by an absolute branch means
11318 we need to put the branch target address into the literal
11319 pool, possibly causing it to overflow.
11321 So, we loop trying to fix up both problems until we manage
11322 to satisfy both conditions at the same time. Note that the
11323 loop is guaranteed to terminate as every pass of the loop
11324 strictly decreases the total number of PC-relative branches
11325 in the function. (This is not completely true as there
11326 might be branch-over-pool insns introduced by chunkify_start.
11327 Those never need to be split however.) */
11331 struct constant_pool *pool = NULL;
11333 /* Collect the literal pool. */
11334 if (!pool_overflow)
11336 pool = s390_mainpool_start ();
11338 pool_overflow = true;
11341 /* If literal pool overflowed, start to chunkify it. */
11343 pool = s390_chunkify_start ();
11345 /* Split out-of-range branches. If this has created new
11346 literal pool entries, cancel current chunk list and
11347 recompute it. zSeries machines have large branch
11348 instructions, so we never need to split a branch. */
11349 if (!TARGET_CPU_ZARCH && s390_split_branches ())
11352 s390_chunkify_cancel (pool);
11354 s390_mainpool_cancel (pool);
11359 /* If we made it up to here, both conditions are satisfied.
11360 Finish up literal pool related changes. */
11362 s390_chunkify_finish (pool);
11364 s390_mainpool_finish (pool);
11366 /* We're done splitting branches. */
11367 cfun->machine->split_branches_pending_p = false;
11371 /* Generate out-of-pool execute target insns. */
11372 if (TARGET_CPU_ZARCH)
11374 rtx_insn *insn, *target;
11377 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11379 label = s390_execute_label (insn);
11383 gcc_assert (label != const0_rtx);
11385 target = emit_label (XEXP (label, 0));
11386 INSN_ADDRESSES_NEW (target, -1);
11388 target = emit_insn (s390_execute_target (insn));
11389 INSN_ADDRESSES_NEW (target, -1);
11393 /* Try to optimize prologue and epilogue further. */
11394 s390_optimize_prologue ();
11396 /* Walk over the insns and do some >=z10 specific changes. */
11397 if (s390_tune == PROCESSOR_2097_Z10
11398 || s390_tune == PROCESSOR_2817_Z196
11399 || s390_tune == PROCESSOR_2827_ZEC12)
11402 bool insn_added_p = false;
11404 /* The insn lengths and addresses have to be up to date for the
11405 following manipulations. */
11406 shorten_branches (get_insns ());
11408 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11410 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
11414 insn_added_p |= s390_fix_long_loop_prediction (insn);
11416 if ((GET_CODE (PATTERN (insn)) == PARALLEL
11417 || GET_CODE (PATTERN (insn)) == SET)
11418 && s390_tune == PROCESSOR_2097_Z10)
11419 insn_added_p |= s390_z10_optimize_cmp (insn);
11422 /* Adjust branches if we added new instructions. */
11424 shorten_branches (get_insns ());
11428 /* Return true if INSN is a fp load insn writing register REGNO. */
11430 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
11433 enum attr_type flag = s390_safe_attr_type (insn);
11435 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
11438 set = single_set (insn);
11440 if (set == NULL_RTX)
11443 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
11446 if (REGNO (SET_DEST (set)) != regno)
11452 /* This value describes the distance to be avoided between an
11453 aritmetic fp instruction and an fp load writing the same register.
11454 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
11455 fine but the exact value has to be avoided. Otherwise the FP
11456 pipeline will throw an exception causing a major penalty. */
11457 #define Z10_EARLYLOAD_DISTANCE 7
11459 /* Rearrange the ready list in order to avoid the situation described
11460 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
11461 moved to the very end of the ready list. */
11463 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
11465 unsigned int regno;
11466 int nready = *nready_p;
11471 enum attr_type flag;
11474 /* Skip DISTANCE - 1 active insns. */
11475 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
11476 distance > 0 && insn != NULL_RTX;
11477 distance--, insn = prev_active_insn (insn))
11478 if (CALL_P (insn) || JUMP_P (insn))
11481 if (insn == NULL_RTX)
11484 set = single_set (insn);
11486 if (set == NULL_RTX || !REG_P (SET_DEST (set))
11487 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
11490 flag = s390_safe_attr_type (insn);
11492 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
11495 regno = REGNO (SET_DEST (set));
11498 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
11505 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
11510 /* The s390_sched_state variable tracks the state of the current or
11511 the last instruction group.
11513 0,1,2 number of instructions scheduled in the current group
11514 3 the last group is complete - normal insns
11515 4 the last group was a cracked/expanded insn */
11517 static int s390_sched_state;
11519 #define S390_OOO_SCHED_STATE_NORMAL 3
11520 #define S390_OOO_SCHED_STATE_CRACKED 4
11522 #define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1
11523 #define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2
11524 #define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4
11525 #define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
11527 static unsigned int
11528 s390_get_sched_attrmask (rtx_insn *insn)
11530 unsigned int mask = 0;
11532 if (get_attr_ooo_cracked (insn))
11533 mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
11534 if (get_attr_ooo_expanded (insn))
11535 mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
11536 if (get_attr_ooo_endgroup (insn))
11537 mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
11538 if (get_attr_ooo_groupalone (insn))
11539 mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
11543 /* Return the scheduling score for INSN. The higher the score the
11544 better. The score is calculated from the OOO scheduling attributes
11545 of INSN and the scheduling state s390_sched_state. */
11547 s390_sched_score (rtx_insn *insn)
11549 unsigned int mask = s390_get_sched_attrmask (insn);
11552 switch (s390_sched_state)
11555 /* Try to put insns into the first slot which would otherwise
11557 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
11558 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
11560 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
11563 /* Prefer not cracked insns while trying to put together a
11565 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
11566 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
11567 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
11569 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
11573 /* Prefer not cracked insns while trying to put together a
11575 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
11576 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
11577 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
11579 /* Prefer endgroup insns in the last slot. */
11580 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
11583 case S390_OOO_SCHED_STATE_NORMAL:
11584 /* Prefer not cracked insns if the last was not cracked. */
11585 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
11586 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
11588 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
11591 case S390_OOO_SCHED_STATE_CRACKED:
11592 /* Try to keep cracked insns together to prevent them from
11593 interrupting groups. */
11594 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
11595 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
11602 /* This function is called via hook TARGET_SCHED_REORDER before
11603 issuing one insn from list READY which contains *NREADYP entries.
11604 For target z10 it reorders load instructions to avoid early load
11605 conflicts in the floating point pipeline */
11607 s390_sched_reorder (FILE *file, int verbose,
11608 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
11610 if (s390_tune == PROCESSOR_2097_Z10)
11611 if (reload_completed && *nreadyp > 1)
11612 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
11614 if (s390_tune == PROCESSOR_2827_ZEC12
11615 && reload_completed
11619 int last_index = *nreadyp - 1;
11620 int max_index = -1;
11621 int max_score = -1;
11624 /* Just move the insn with the highest score to the top (the
11625 end) of the list. A full sort is not needed since a conflict
11626 in the hazard recognition cannot happen. So the top insn in
11627 the ready list will always be taken. */
11628 for (i = last_index; i >= 0; i--)
11632 if (recog_memoized (ready[i]) < 0)
11635 score = s390_sched_score (ready[i]);
11636 if (score > max_score)
11643 if (max_index != -1)
11645 if (max_index != last_index)
11647 tmp = ready[max_index];
11648 ready[max_index] = ready[last_index];
11649 ready[last_index] = tmp;
11653 "move insn %d to the top of list\n",
11654 INSN_UID (ready[last_index]));
11656 else if (verbose > 5)
11658 "best insn %d already on top\n",
11659 INSN_UID (ready[last_index]));
11664 fprintf (file, "ready list ooo attributes - sched state: %d\n",
11667 for (i = last_index; i >= 0; i--)
11669 if (recog_memoized (ready[i]) < 0)
11671 fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
11672 s390_sched_score (ready[i]));
11673 #define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
11674 PRINT_OOO_ATTR (ooo_cracked);
11675 PRINT_OOO_ATTR (ooo_expanded);
11676 PRINT_OOO_ATTR (ooo_endgroup);
11677 PRINT_OOO_ATTR (ooo_groupalone);
11678 #undef PRINT_OOO_ATTR
11679 fprintf (file, "\n");
11684 return s390_issue_rate ();
11688 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
11689 the scheduler has issued INSN. It stores the last issued insn into
11690 last_scheduled_insn in order to make it available for
11691 s390_sched_reorder. */
11693 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
11695 last_scheduled_insn = insn;
11697 if (s390_tune == PROCESSOR_2827_ZEC12
11698 && reload_completed
11699 && recog_memoized (insn) >= 0)
11701 unsigned int mask = s390_get_sched_attrmask (insn);
11703 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
11704 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
11705 s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
11706 else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
11707 || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
11708 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
11711 /* Only normal insns are left (mask == 0). */
11712 switch (s390_sched_state)
11717 case S390_OOO_SCHED_STATE_NORMAL:
11718 if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
11719 s390_sched_state = 1;
11721 s390_sched_state++;
11724 case S390_OOO_SCHED_STATE_CRACKED:
11725 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
11731 fprintf (file, "insn %d: ", INSN_UID (insn));
11732 #define PRINT_OOO_ATTR(ATTR) \
11733 fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
11734 PRINT_OOO_ATTR (ooo_cracked);
11735 PRINT_OOO_ATTR (ooo_expanded);
11736 PRINT_OOO_ATTR (ooo_endgroup);
11737 PRINT_OOO_ATTR (ooo_groupalone);
11738 #undef PRINT_OOO_ATTR
11739 fprintf (file, "\n");
11740 fprintf (file, "sched state: %d\n", s390_sched_state);
11744 if (GET_CODE (PATTERN (insn)) != USE
11745 && GET_CODE (PATTERN (insn)) != CLOBBER)
11752 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
11753 int verbose ATTRIBUTE_UNUSED,
11754 int max_ready ATTRIBUTE_UNUSED)
11756 last_scheduled_insn = NULL;
11757 s390_sched_state = 0;
11760 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
11761 a new number struct loop *loop should be unrolled if tuned for cpus with
11762 a built-in stride prefetcher.
11763 The loop is analyzed for memory accesses by calling check_dpu for
11764 each rtx of the loop. Depending on the loop_depth and the amount of
11765 memory accesses a new number <=nunroll is returned to improve the
11766 behaviour of the hardware prefetch unit. */
11768 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
11773 unsigned mem_count = 0;
11775 if (s390_tune != PROCESSOR_2097_Z10
11776 && s390_tune != PROCESSOR_2817_Z196
11777 && s390_tune != PROCESSOR_2827_ZEC12)
11780 /* Count the number of memory references within the loop body. */
11781 bbs = get_loop_body (loop);
11782 subrtx_iterator::array_type array;
11783 for (i = 0; i < loop->num_nodes; i++)
11784 FOR_BB_INSNS (bbs[i], insn)
11785 if (INSN_P (insn) && INSN_CODE (insn) != -1)
11786 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
11791 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
11792 if (mem_count == 0)
11795 switch (loop_depth(loop))
11798 return MIN (nunroll, 28 / mem_count);
11800 return MIN (nunroll, 22 / mem_count);
11802 return MIN (nunroll, 16 / mem_count);
11807 s390_option_override (void)
11810 cl_deferred_option *opt;
11811 vec<cl_deferred_option> *v =
11812 (vec<cl_deferred_option> *) s390_deferred_options;
11815 FOR_EACH_VEC_ELT (*v, i, opt)
11817 switch (opt->opt_index)
11819 case OPT_mhotpatch:
11820 s390_hotpatch_trampoline_halfwords = (opt->value) ?
11821 s390_hotpatch_trampoline_halfwords_default : -1;
11823 case OPT_mhotpatch_:
11827 val = integral_argument (opt->arg);
11830 /* argument is not a plain number */
11831 error ("argument to %qs should be a non-negative integer",
11835 else if (val > s390_hotpatch_trampoline_halfwords_max)
11837 error ("argument to %qs is too large (max. %d)",
11838 "-mhotpatch=", s390_hotpatch_trampoline_halfwords_max);
11841 s390_hotpatch_trampoline_halfwords = val;
11845 gcc_unreachable ();
11849 /* Set up function hooks. */
11850 init_machine_status = s390_init_machine_status;
11852 /* Architecture mode defaults according to ABI. */
11853 if (!(target_flags_explicit & MASK_ZARCH))
11856 target_flags |= MASK_ZARCH;
11858 target_flags &= ~MASK_ZARCH;
11861 /* Set the march default in case it hasn't been specified on
11863 if (s390_arch == PROCESSOR_max)
11865 s390_arch_string = TARGET_ZARCH? "z900" : "g5";
11866 s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5;
11867 s390_arch_flags = processor_flags_table[(int)s390_arch];
11870 /* Determine processor to tune for. */
11871 if (s390_tune == PROCESSOR_max)
11873 s390_tune = s390_arch;
11874 s390_tune_flags = s390_arch_flags;
11877 /* Sanity checks. */
11878 if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
11879 error ("z/Architecture mode not supported on %s", s390_arch_string);
11880 if (TARGET_64BIT && !TARGET_ZARCH)
11881 error ("64-bit ABI not supported in ESA/390 mode");
11883 /* Use hardware DFP if available and not explicitly disabled by
11884 user. E.g. with -m31 -march=z10 -mzarch */
11885 if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
11886 target_flags |= MASK_HARD_DFP;
11888 /* Enable hardware transactions if available and not explicitly
11889 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
11890 if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
11891 target_flags |= MASK_OPT_HTM;
11893 if (TARGET_HARD_DFP && !TARGET_DFP)
11895 if (target_flags_explicit & MASK_HARD_DFP)
11897 if (!TARGET_CPU_DFP)
11898 error ("hardware decimal floating point instructions"
11899 " not available on %s", s390_arch_string);
11901 error ("hardware decimal floating point instructions"
11902 " not available in ESA/390 mode");
11905 target_flags &= ~MASK_HARD_DFP;
11908 if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
11910 if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
11911 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
11913 target_flags &= ~MASK_HARD_DFP;
11916 /* Set processor cost function. */
11919 case PROCESSOR_2084_Z990:
11920 s390_cost = &z990_cost;
11922 case PROCESSOR_2094_Z9_109:
11923 s390_cost = &z9_109_cost;
11925 case PROCESSOR_2097_Z10:
11926 s390_cost = &z10_cost;
11928 case PROCESSOR_2817_Z196:
11929 s390_cost = &z196_cost;
11931 case PROCESSOR_2827_ZEC12:
11932 s390_cost = &zEC12_cost;
11935 s390_cost = &z900_cost;
11938 if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
11939 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
11942 if (s390_stack_size)
11944 if (s390_stack_guard >= s390_stack_size)
11945 error ("stack size must be greater than the stack guard value");
11946 else if (s390_stack_size > 1 << 16)
11947 error ("stack size must not be greater than 64k");
11949 else if (s390_stack_guard)
11950 error ("-mstack-guard implies use of -mstack-size");
11952 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
11953 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
11954 target_flags |= MASK_LONG_DOUBLE_128;
11957 if (s390_tune == PROCESSOR_2097_Z10
11958 || s390_tune == PROCESSOR_2817_Z196
11959 || s390_tune == PROCESSOR_2827_ZEC12)
11961 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
11962 global_options.x_param_values,
11963 global_options_set.x_param_values);
11964 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
11965 global_options.x_param_values,
11966 global_options_set.x_param_values);
11967 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
11968 global_options.x_param_values,
11969 global_options_set.x_param_values);
11970 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
11971 global_options.x_param_values,
11972 global_options_set.x_param_values);
11975 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
11976 global_options.x_param_values,
11977 global_options_set.x_param_values);
11978 /* values for loop prefetching */
11979 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
11980 global_options.x_param_values,
11981 global_options_set.x_param_values);
11982 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
11983 global_options.x_param_values,
11984 global_options_set.x_param_values);
11985 /* s390 has more than 2 levels and the size is much larger. Since
11986 we are always running virtualized assume that we only get a small
11987 part of the caches above l1. */
11988 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
11989 global_options.x_param_values,
11990 global_options_set.x_param_values);
11991 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
11992 global_options.x_param_values,
11993 global_options_set.x_param_values);
11994 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
11995 global_options.x_param_values,
11996 global_options_set.x_param_values);
11998 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
11999 requires the arch flags to be evaluated already. Since prefetching
12000 is beneficial on s390, we enable it if available. */
12001 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
12002 flag_prefetch_loop_arrays = 1;
12004 /* Use the alternative scheduling-pressure algorithm by default. */
12005 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
12006 global_options.x_param_values,
12007 global_options_set.x_param_values);
12011 /* Don't emit DWARF3/4 unless specifically selected. The TPF
12012 debuggers do not yet support DWARF 3/4. */
12013 if (!global_options_set.x_dwarf_strict)
12015 if (!global_options_set.x_dwarf_version)
12019 /* Register a target-specific optimization-and-lowering pass
12020 to run immediately before prologue and epilogue generation.
12022 Registering the pass must be done at start up. It's
12023 convenient to do it here. */
12024 opt_pass *new_pass = new pass_s390_early_mach (g);
12025 struct register_pass_info insert_pass_s390_early_mach =
12027 new_pass, /* pass */
12028 "pro_and_epilogue", /* reference_pass_name */
12029 1, /* ref_pass_instance_number */
12030 PASS_POS_INSERT_BEFORE /* po_op */
12032 register_pass (&insert_pass_s390_early_mach);
12035 /* Initialize GCC target structure. */
12037 #undef TARGET_ASM_ALIGNED_HI_OP
12038 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
12039 #undef TARGET_ASM_ALIGNED_DI_OP
12040 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
12041 #undef TARGET_ASM_INTEGER
12042 #define TARGET_ASM_INTEGER s390_assemble_integer
12044 #undef TARGET_ASM_OPEN_PAREN
12045 #define TARGET_ASM_OPEN_PAREN ""
12047 #undef TARGET_ASM_CLOSE_PAREN
12048 #define TARGET_ASM_CLOSE_PAREN ""
12050 #undef TARGET_OPTION_OVERRIDE
12051 #define TARGET_OPTION_OVERRIDE s390_option_override
12053 #undef TARGET_ENCODE_SECTION_INFO
12054 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
12056 #undef TARGET_SCALAR_MODE_SUPPORTED_P
12057 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
12060 #undef TARGET_HAVE_TLS
12061 #define TARGET_HAVE_TLS true
12063 #undef TARGET_CANNOT_FORCE_CONST_MEM
12064 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
12066 #undef TARGET_DELEGITIMIZE_ADDRESS
12067 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
12069 #undef TARGET_LEGITIMIZE_ADDRESS
12070 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
12072 #undef TARGET_RETURN_IN_MEMORY
12073 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
12075 #undef TARGET_INIT_BUILTINS
12076 #define TARGET_INIT_BUILTINS s390_init_builtins
12077 #undef TARGET_EXPAND_BUILTIN
12078 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
12080 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
12081 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
12083 #undef TARGET_ASM_OUTPUT_MI_THUNK
12084 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
12085 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
12086 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
12088 #undef TARGET_SCHED_ADJUST_PRIORITY
12089 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
12090 #undef TARGET_SCHED_ISSUE_RATE
12091 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
12092 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
12093 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
12095 #undef TARGET_SCHED_VARIABLE_ISSUE
12096 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
12097 #undef TARGET_SCHED_REORDER
12098 #define TARGET_SCHED_REORDER s390_sched_reorder
12099 #undef TARGET_SCHED_INIT
12100 #define TARGET_SCHED_INIT s390_sched_init
12102 #undef TARGET_CANNOT_COPY_INSN_P
12103 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
12104 #undef TARGET_RTX_COSTS
12105 #define TARGET_RTX_COSTS s390_rtx_costs
12106 #undef TARGET_ADDRESS_COST
12107 #define TARGET_ADDRESS_COST s390_address_cost
12108 #undef TARGET_REGISTER_MOVE_COST
12109 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
12110 #undef TARGET_MEMORY_MOVE_COST
12111 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
12113 #undef TARGET_MACHINE_DEPENDENT_REORG
12114 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
12116 #undef TARGET_VALID_POINTER_MODE
12117 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
12119 #undef TARGET_BUILD_BUILTIN_VA_LIST
12120 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
12121 #undef TARGET_EXPAND_BUILTIN_VA_START
12122 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
12123 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
12124 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
12126 #undef TARGET_PROMOTE_FUNCTION_MODE
12127 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
12128 #undef TARGET_PASS_BY_REFERENCE
12129 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
12131 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
12132 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
12133 #undef TARGET_FUNCTION_ARG
12134 #define TARGET_FUNCTION_ARG s390_function_arg
12135 #undef TARGET_FUNCTION_ARG_ADVANCE
12136 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
12137 #undef TARGET_FUNCTION_VALUE
12138 #define TARGET_FUNCTION_VALUE s390_function_value
12139 #undef TARGET_LIBCALL_VALUE
12140 #define TARGET_LIBCALL_VALUE s390_libcall_value
12142 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
12143 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
12145 #undef TARGET_FIXED_CONDITION_CODE_REGS
12146 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
12148 #undef TARGET_CC_MODES_COMPATIBLE
12149 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
12151 #undef TARGET_INVALID_WITHIN_DOLOOP
12152 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
12155 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
12156 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
12159 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12160 #undef TARGET_MANGLE_TYPE
12161 #define TARGET_MANGLE_TYPE s390_mangle_type
12164 #undef TARGET_SCALAR_MODE_SUPPORTED_P
12165 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
12167 #undef TARGET_PREFERRED_RELOAD_CLASS
12168 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
12170 #undef TARGET_SECONDARY_RELOAD
12171 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
12173 #undef TARGET_LIBGCC_CMP_RETURN_MODE
12174 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
12176 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
12177 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
12179 #undef TARGET_LEGITIMATE_ADDRESS_P
12180 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
12182 #undef TARGET_LEGITIMATE_CONSTANT_P
12183 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
12185 #undef TARGET_LRA_P
12186 #define TARGET_LRA_P s390_lra_p
12188 #undef TARGET_CAN_ELIMINATE
12189 #define TARGET_CAN_ELIMINATE s390_can_eliminate
12191 #undef TARGET_CONDITIONAL_REGISTER_USAGE
12192 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
12194 #undef TARGET_LOOP_UNROLL_ADJUST
12195 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
12197 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
12198 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
12199 #undef TARGET_TRAMPOLINE_INIT
12200 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
12202 #undef TARGET_UNWIND_WORD_MODE
12203 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
12205 #undef TARGET_CANONICALIZE_COMPARISON
12206 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
12208 #undef TARGET_HARD_REGNO_SCRATCH_OK
12209 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
12211 #undef TARGET_ATTRIBUTE_TABLE
12212 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
12214 #undef TARGET_CAN_INLINE_P
12215 #define TARGET_CAN_INLINE_P s390_can_inline_p
12217 #undef TARGET_SET_UP_BY_PROLOGUE
12218 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
12220 struct gcc_target targetm = TARGET_INITIALIZER;
12222 #include "gt-s390.h"