1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2017 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
28 #include "target-globals.h"
37 #include "stringpool.h"
44 #include "diagnostic-core.h"
45 #include "diagnostic.h"
47 #include "fold-const.h"
48 #include "print-tree.h"
49 #include "stor-layout.h"
52 #include "conditions.h"
54 #include "insn-attr.h"
66 #include "cfgcleanup.h"
68 #include "langhooks.h"
69 #include "internal-fn.h"
70 #include "gimple-fold.h"
75 #include "tree-pass.h"
80 #include "tm-constrs.h"
82 /* This file should be included last. */
83 #include "target-def.h"
85 /* Remember the last target of s390_set_current_function. */
86 static GTY(()) tree s390_previous_fndecl;
88 /* Define the specific costs for a given cpu. */
90 struct processor_costs
93 const int m; /* cost of an M instruction. */
94 const int mghi; /* cost of an MGHI instruction. */
95 const int mh; /* cost of an MH instruction. */
96 const int mhi; /* cost of an MHI instruction. */
97 const int ml; /* cost of an ML instruction. */
98 const int mr; /* cost of an MR instruction. */
99 const int ms; /* cost of an MS instruction. */
100 const int msg; /* cost of an MSG instruction. */
101 const int msgf; /* cost of an MSGF instruction. */
102 const int msgfr; /* cost of an MSGFR instruction. */
103 const int msgr; /* cost of an MSGR instruction. */
104 const int msr; /* cost of an MSR instruction. */
105 const int mult_df; /* cost of multiplication in DFmode. */
108 const int sqxbr; /* cost of square root in TFmode. */
109 const int sqdbr; /* cost of square root in DFmode. */
110 const int sqebr; /* cost of square root in SFmode. */
111 /* multiply and add */
112 const int madbr; /* cost of multiply and add in DFmode. */
113 const int maebr; /* cost of multiply and add in SFmode. */
125 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
128 struct processor_costs z900_cost =
130 COSTS_N_INSNS (5), /* M */
131 COSTS_N_INSNS (10), /* MGHI */
132 COSTS_N_INSNS (5), /* MH */
133 COSTS_N_INSNS (4), /* MHI */
134 COSTS_N_INSNS (5), /* ML */
135 COSTS_N_INSNS (5), /* MR */
136 COSTS_N_INSNS (4), /* MS */
137 COSTS_N_INSNS (15), /* MSG */
138 COSTS_N_INSNS (7), /* MSGF */
139 COSTS_N_INSNS (7), /* MSGFR */
140 COSTS_N_INSNS (10), /* MSGR */
141 COSTS_N_INSNS (4), /* MSR */
142 COSTS_N_INSNS (7), /* multiplication in DFmode */
143 COSTS_N_INSNS (13), /* MXBR */
144 COSTS_N_INSNS (136), /* SQXBR */
145 COSTS_N_INSNS (44), /* SQDBR */
146 COSTS_N_INSNS (35), /* SQEBR */
147 COSTS_N_INSNS (18), /* MADBR */
148 COSTS_N_INSNS (13), /* MAEBR */
149 COSTS_N_INSNS (134), /* DXBR */
150 COSTS_N_INSNS (30), /* DDBR */
151 COSTS_N_INSNS (27), /* DEBR */
152 COSTS_N_INSNS (220), /* DLGR */
153 COSTS_N_INSNS (34), /* DLR */
154 COSTS_N_INSNS (34), /* DR */
155 COSTS_N_INSNS (32), /* DSGFR */
156 COSTS_N_INSNS (32), /* DSGR */
160 struct processor_costs z990_cost =
162 COSTS_N_INSNS (4), /* M */
163 COSTS_N_INSNS (2), /* MGHI */
164 COSTS_N_INSNS (2), /* MH */
165 COSTS_N_INSNS (2), /* MHI */
166 COSTS_N_INSNS (4), /* ML */
167 COSTS_N_INSNS (4), /* MR */
168 COSTS_N_INSNS (5), /* MS */
169 COSTS_N_INSNS (6), /* MSG */
170 COSTS_N_INSNS (4), /* MSGF */
171 COSTS_N_INSNS (4), /* MSGFR */
172 COSTS_N_INSNS (4), /* MSGR */
173 COSTS_N_INSNS (4), /* MSR */
174 COSTS_N_INSNS (1), /* multiplication in DFmode */
175 COSTS_N_INSNS (28), /* MXBR */
176 COSTS_N_INSNS (130), /* SQXBR */
177 COSTS_N_INSNS (66), /* SQDBR */
178 COSTS_N_INSNS (38), /* SQEBR */
179 COSTS_N_INSNS (1), /* MADBR */
180 COSTS_N_INSNS (1), /* MAEBR */
181 COSTS_N_INSNS (60), /* DXBR */
182 COSTS_N_INSNS (40), /* DDBR */
183 COSTS_N_INSNS (26), /* DEBR */
184 COSTS_N_INSNS (176), /* DLGR */
185 COSTS_N_INSNS (31), /* DLR */
186 COSTS_N_INSNS (31), /* DR */
187 COSTS_N_INSNS (31), /* DSGFR */
188 COSTS_N_INSNS (31), /* DSGR */
192 struct processor_costs z9_109_cost =
194 COSTS_N_INSNS (4), /* M */
195 COSTS_N_INSNS (2), /* MGHI */
196 COSTS_N_INSNS (2), /* MH */
197 COSTS_N_INSNS (2), /* MHI */
198 COSTS_N_INSNS (4), /* ML */
199 COSTS_N_INSNS (4), /* MR */
200 COSTS_N_INSNS (5), /* MS */
201 COSTS_N_INSNS (6), /* MSG */
202 COSTS_N_INSNS (4), /* MSGF */
203 COSTS_N_INSNS (4), /* MSGFR */
204 COSTS_N_INSNS (4), /* MSGR */
205 COSTS_N_INSNS (4), /* MSR */
206 COSTS_N_INSNS (1), /* multiplication in DFmode */
207 COSTS_N_INSNS (28), /* MXBR */
208 COSTS_N_INSNS (130), /* SQXBR */
209 COSTS_N_INSNS (66), /* SQDBR */
210 COSTS_N_INSNS (38), /* SQEBR */
211 COSTS_N_INSNS (1), /* MADBR */
212 COSTS_N_INSNS (1), /* MAEBR */
213 COSTS_N_INSNS (60), /* DXBR */
214 COSTS_N_INSNS (40), /* DDBR */
215 COSTS_N_INSNS (26), /* DEBR */
216 COSTS_N_INSNS (30), /* DLGR */
217 COSTS_N_INSNS (23), /* DLR */
218 COSTS_N_INSNS (23), /* DR */
219 COSTS_N_INSNS (24), /* DSGFR */
220 COSTS_N_INSNS (24), /* DSGR */
224 struct processor_costs z10_cost =
226 COSTS_N_INSNS (10), /* M */
227 COSTS_N_INSNS (10), /* MGHI */
228 COSTS_N_INSNS (10), /* MH */
229 COSTS_N_INSNS (10), /* MHI */
230 COSTS_N_INSNS (10), /* ML */
231 COSTS_N_INSNS (10), /* MR */
232 COSTS_N_INSNS (10), /* MS */
233 COSTS_N_INSNS (10), /* MSG */
234 COSTS_N_INSNS (10), /* MSGF */
235 COSTS_N_INSNS (10), /* MSGFR */
236 COSTS_N_INSNS (10), /* MSGR */
237 COSTS_N_INSNS (10), /* MSR */
238 COSTS_N_INSNS (1) , /* multiplication in DFmode */
239 COSTS_N_INSNS (50), /* MXBR */
240 COSTS_N_INSNS (120), /* SQXBR */
241 COSTS_N_INSNS (52), /* SQDBR */
242 COSTS_N_INSNS (38), /* SQEBR */
243 COSTS_N_INSNS (1), /* MADBR */
244 COSTS_N_INSNS (1), /* MAEBR */
245 COSTS_N_INSNS (111), /* DXBR */
246 COSTS_N_INSNS (39), /* DDBR */
247 COSTS_N_INSNS (32), /* DEBR */
248 COSTS_N_INSNS (160), /* DLGR */
249 COSTS_N_INSNS (71), /* DLR */
250 COSTS_N_INSNS (71), /* DR */
251 COSTS_N_INSNS (71), /* DSGFR */
252 COSTS_N_INSNS (71), /* DSGR */
256 struct processor_costs z196_cost =
258 COSTS_N_INSNS (7), /* M */
259 COSTS_N_INSNS (5), /* MGHI */
260 COSTS_N_INSNS (5), /* MH */
261 COSTS_N_INSNS (5), /* MHI */
262 COSTS_N_INSNS (7), /* ML */
263 COSTS_N_INSNS (7), /* MR */
264 COSTS_N_INSNS (6), /* MS */
265 COSTS_N_INSNS (8), /* MSG */
266 COSTS_N_INSNS (6), /* MSGF */
267 COSTS_N_INSNS (6), /* MSGFR */
268 COSTS_N_INSNS (8), /* MSGR */
269 COSTS_N_INSNS (6), /* MSR */
270 COSTS_N_INSNS (1) , /* multiplication in DFmode */
271 COSTS_N_INSNS (40), /* MXBR B+40 */
272 COSTS_N_INSNS (100), /* SQXBR B+100 */
273 COSTS_N_INSNS (42), /* SQDBR B+42 */
274 COSTS_N_INSNS (28), /* SQEBR B+28 */
275 COSTS_N_INSNS (1), /* MADBR B */
276 COSTS_N_INSNS (1), /* MAEBR B */
277 COSTS_N_INSNS (101), /* DXBR B+101 */
278 COSTS_N_INSNS (29), /* DDBR */
279 COSTS_N_INSNS (22), /* DEBR */
280 COSTS_N_INSNS (160), /* DLGR cracked */
281 COSTS_N_INSNS (160), /* DLR cracked */
282 COSTS_N_INSNS (160), /* DR expanded */
283 COSTS_N_INSNS (160), /* DSGFR cracked */
284 COSTS_N_INSNS (160), /* DSGR cracked */
288 struct processor_costs zEC12_cost =
290 COSTS_N_INSNS (7), /* M */
291 COSTS_N_INSNS (5), /* MGHI */
292 COSTS_N_INSNS (5), /* MH */
293 COSTS_N_INSNS (5), /* MHI */
294 COSTS_N_INSNS (7), /* ML */
295 COSTS_N_INSNS (7), /* MR */
296 COSTS_N_INSNS (6), /* MS */
297 COSTS_N_INSNS (8), /* MSG */
298 COSTS_N_INSNS (6), /* MSGF */
299 COSTS_N_INSNS (6), /* MSGFR */
300 COSTS_N_INSNS (8), /* MSGR */
301 COSTS_N_INSNS (6), /* MSR */
302 COSTS_N_INSNS (1) , /* multiplication in DFmode */
303 COSTS_N_INSNS (40), /* MXBR B+40 */
304 COSTS_N_INSNS (100), /* SQXBR B+100 */
305 COSTS_N_INSNS (42), /* SQDBR B+42 */
306 COSTS_N_INSNS (28), /* SQEBR B+28 */
307 COSTS_N_INSNS (1), /* MADBR B */
308 COSTS_N_INSNS (1), /* MAEBR B */
309 COSTS_N_INSNS (131), /* DXBR B+131 */
310 COSTS_N_INSNS (29), /* DDBR */
311 COSTS_N_INSNS (22), /* DEBR */
312 COSTS_N_INSNS (160), /* DLGR cracked */
313 COSTS_N_INSNS (160), /* DLR cracked */
314 COSTS_N_INSNS (160), /* DR expanded */
315 COSTS_N_INSNS (160), /* DSGFR cracked */
316 COSTS_N_INSNS (160), /* DSGR cracked */
321 const char *const name;
322 const enum processor_type processor;
323 const struct processor_costs *cost;
325 const processor_table[] =
327 { "g5", PROCESSOR_9672_G5, &z900_cost },
328 { "g6", PROCESSOR_9672_G6, &z900_cost },
329 { "z900", PROCESSOR_2064_Z900, &z900_cost },
330 { "z990", PROCESSOR_2084_Z990, &z990_cost },
331 { "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
332 { "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost },
333 { "z10", PROCESSOR_2097_Z10, &z10_cost },
334 { "z196", PROCESSOR_2817_Z196, &z196_cost },
335 { "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost },
336 { "z13", PROCESSOR_2964_Z13, &zEC12_cost },
337 { "native", PROCESSOR_NATIVE, NULL }
340 extern int reload_completed;
342 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
343 static rtx_insn *last_scheduled_insn;
344 #define MAX_SCHED_UNITS 3
345 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
347 /* The maximum score added for an instruction whose unit hasn't been
348 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
349 give instruction mix scheduling more priority over instruction
351 #define MAX_SCHED_MIX_SCORE 8
353 /* The maximum distance up to which individual scores will be
354 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
355 Increase this with the OOO windows size of the machine. */
356 #define MAX_SCHED_MIX_DISTANCE 100
358 /* Structure used to hold the components of a S/390 memory
359 address. A legitimate address on S/390 is of the general
361 base + index + displacement
362 where any of the components is optional.
364 base and index are registers of the class ADDR_REGS,
365 displacement is an unsigned 12-bit immediate constant. */
376 /* The following structure is embedded in the machine
377 specific part of struct function. */
379 struct GTY (()) s390_frame_layout
381 /* Offset within stack frame. */
382 HOST_WIDE_INT gprs_offset;
383 HOST_WIDE_INT f0_offset;
384 HOST_WIDE_INT f4_offset;
385 HOST_WIDE_INT f8_offset;
386 HOST_WIDE_INT backchain_offset;
388 /* Number of first and last gpr where slots in the register
389 save area are reserved for. */
390 int first_save_gpr_slot;
391 int last_save_gpr_slot;
393 /* Location (FP register number) where GPRs (r0-r15) should
395 0 - does not need to be saved at all
397 #define SAVE_SLOT_NONE 0
398 #define SAVE_SLOT_STACK -1
399 signed char gpr_save_slots[16];
401 /* Number of first and last gpr to be saved, restored. */
403 int first_restore_gpr;
405 int last_restore_gpr;
407 /* Bits standing for floating point registers. Set, if the
408 respective register has to be saved. Starting with reg 16 (f0)
409 at the rightmost bit.
410 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
411 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
412 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
413 unsigned int fpr_bitmap;
415 /* Number of floating point registers f8-f15 which must be saved. */
418 /* Set if return address needs to be saved.
419 This flag is set by s390_return_addr_rtx if it could not use
420 the initial value of r14 and therefore depends on r14 saved
422 bool save_return_addr_p;
424 /* Size of stack frame. */
425 HOST_WIDE_INT frame_size;
428 /* Define the structure for the machine field in struct function. */
430 struct GTY(()) machine_function
432 struct s390_frame_layout frame_layout;
434 /* Literal pool base register. */
437 /* True if we may need to perform branch splitting. */
438 bool split_branches_pending_p;
440 bool has_landing_pad_p;
442 /* True if the current function may contain a tbegin clobbering
446 /* For -fsplit-stack support: A stack local which holds a pointer to
447 the stack arguments for a function with a variable number of
448 arguments. This is set at the start of the function and is used
449 to initialize the overflow_arg_area field of the va_list
451 rtx split_stack_varargs_pointer;
454 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
456 #define cfun_frame_layout (cfun->machine->frame_layout)
457 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
458 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
459 ? cfun_frame_layout.fpr_bitmap & 0x0f \
460 : cfun_frame_layout.fpr_bitmap & 0x03))
461 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
462 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
463 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
464 (1 << (REGNO - FPR0_REGNUM)))
465 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
466 (1 << (REGNO - FPR0_REGNUM))))
467 #define cfun_gpr_save_slot(REGNO) \
468 cfun->machine->frame_layout.gpr_save_slots[REGNO]
470 /* Number of GPRs and FPRs used for argument passing. */
471 #define GP_ARG_NUM_REG 5
472 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
473 #define VEC_ARG_NUM_REG 8
475 /* A couple of shortcuts. */
476 #define CONST_OK_FOR_J(x) \
477 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
478 #define CONST_OK_FOR_K(x) \
479 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
480 #define CONST_OK_FOR_Os(x) \
481 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
482 #define CONST_OK_FOR_Op(x) \
483 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
484 #define CONST_OK_FOR_On(x) \
485 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
487 #define REGNO_PAIR_OK(REGNO, MODE) \
488 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
490 /* That's the read ahead of the dynamic branch prediction unit in
491 bytes on a z10 (or higher) CPU. */
492 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
495 /* Indicate which ABI has been used for passing vector args.
496 0 - no vector type arguments have been passed where the ABI is relevant
497 1 - the old ABI has been used
498 2 - a vector type argument has been passed either in a vector register
499 or on the stack by value */
500 static int s390_vector_abi = 0;
502 /* Set the vector ABI marker if TYPE is subject to the vector ABI
503 switch. The vector ABI affects only vector data types. There are
504 two aspects of the vector ABI relevant here:
506 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
507 ABI and natural alignment with the old.
509 2. vector <= 16 bytes are passed in VRs or by value on the stack
510 with the new ABI but by reference on the stack with the old.
512 If ARG_P is true TYPE is used for a function argument or return
513 value. The ABI marker then is set for all vector data types. If
514 ARG_P is false only type 1 vectors are being checked. */
517 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
519 static hash_set<const_tree> visited_types_hash;
524 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
527 if (visited_types_hash.contains (type))
530 visited_types_hash.add (type);
532 if (VECTOR_TYPE_P (type))
534 int type_size = int_size_in_bytes (type);
536 /* Outside arguments only the alignment is changing and this
537 only happens for vector types >= 16 bytes. */
538 if (!arg_p && type_size < 16)
541 /* In arguments vector types > 16 are passed as before (GCC
542 never enforced the bigger alignment for arguments which was
543 required by the old vector ABI). However, it might still be
544 ABI relevant due to the changed alignment if it is a struct
546 if (arg_p && type_size > 16 && !in_struct_p)
549 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
551 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
553 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
554 natural alignment there will never be ABI dependent padding
555 in an array type. That's why we do not set in_struct_p to
557 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
559 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
563 /* Check the return type. */
564 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
566 for (arg_chain = TYPE_ARG_TYPES (type);
568 arg_chain = TREE_CHAIN (arg_chain))
569 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
571 else if (RECORD_OR_UNION_TYPE_P (type))
575 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
577 if (TREE_CODE (field) != FIELD_DECL)
580 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
586 /* System z builtins. */
588 #include "s390-builtins.h"
590 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
595 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
597 #define OB_DEF_VAR(...)
598 #include "s390-builtins.def"
602 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
607 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
609 #define OB_DEF_VAR(...)
610 #include "s390-builtins.def"
614 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
620 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
621 #define OB_DEF_VAR(...)
622 #include "s390-builtins.def"
627 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
634 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS,
635 #include "s390-builtins.def"
639 tree s390_builtin_types[BT_MAX];
640 tree s390_builtin_fn_types[BT_FN_MAX];
641 tree s390_builtin_decls[S390_BUILTIN_MAX +
642 S390_OVERLOADED_BUILTIN_MAX +
643 S390_OVERLOADED_BUILTIN_VAR_MAX];
645 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
649 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
651 #define OB_DEF_VAR(...)
653 #include "s390-builtins.def"
658 s390_init_builtins (void)
660 /* These definitions are being used in s390-builtins.def. */
661 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
663 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
664 tree c_uint64_type_node;
666 /* The uint64_type_node from tree.c is not compatible to the C99
667 uint64_t data type. What we want is c_uint64_type_node from
668 c-common.c. But since backend code is not supposed to interface
669 with the frontend we recreate it here. */
671 c_uint64_type_node = long_unsigned_type_node;
673 c_uint64_type_node = long_long_unsigned_type_node;
676 #define DEF_TYPE(INDEX, NODE, CONST_P) \
677 if (s390_builtin_types[INDEX] == NULL) \
678 s390_builtin_types[INDEX] = (!CONST_P) ? \
679 (NODE) : build_type_variant ((NODE), 1, 0);
681 #undef DEF_POINTER_TYPE
682 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
683 if (s390_builtin_types[INDEX] == NULL) \
684 s390_builtin_types[INDEX] = \
685 build_pointer_type (s390_builtin_types[INDEX_BASE]);
687 #undef DEF_DISTINCT_TYPE
688 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
689 if (s390_builtin_types[INDEX] == NULL) \
690 s390_builtin_types[INDEX] = \
691 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
693 #undef DEF_VECTOR_TYPE
694 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
695 if (s390_builtin_types[INDEX] == NULL) \
696 s390_builtin_types[INDEX] = \
697 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
699 #undef DEF_OPAQUE_VECTOR_TYPE
700 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
701 if (s390_builtin_types[INDEX] == NULL) \
702 s390_builtin_types[INDEX] = \
703 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
706 #define DEF_FN_TYPE(INDEX, args...) \
707 if (s390_builtin_fn_types[INDEX] == NULL) \
708 s390_builtin_fn_types[INDEX] = \
709 build_function_type_list (args, NULL_TREE);
711 #define DEF_OV_TYPE(...)
712 #include "s390-builtin-types.def"
715 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
716 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
717 s390_builtin_decls[S390_BUILTIN_##NAME] = \
718 add_builtin_function ("__builtin_" #NAME, \
719 s390_builtin_fn_types[FNTYPE], \
720 S390_BUILTIN_##NAME, \
725 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
726 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
728 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
729 add_builtin_function ("__builtin_" #NAME, \
730 s390_builtin_fn_types[FNTYPE], \
731 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
736 #define OB_DEF_VAR(...)
737 #include "s390-builtins.def"
741 /* Return true if ARG is appropriate as argument number ARGNUM of
742 builtin DECL. The operand flags from s390-builtins.def have to
743 passed as OP_FLAGS. */
745 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
747 if (O_UIMM_P (op_flags))
749 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
750 int bitwidth = bitwidths[op_flags - O_U1];
752 if (!tree_fits_uhwi_p (arg)
753 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
755 error("constant argument %d for builtin %qF is out of range (0.."
756 HOST_WIDE_INT_PRINT_UNSIGNED ")",
758 (HOST_WIDE_INT_1U << bitwidth) - 1);
763 if (O_SIMM_P (op_flags))
765 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
766 int bitwidth = bitwidths[op_flags - O_S2];
768 if (!tree_fits_shwi_p (arg)
769 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
770 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
772 error("constant argument %d for builtin %qF is out of range ("
773 HOST_WIDE_INT_PRINT_DEC ".."
774 HOST_WIDE_INT_PRINT_DEC ")",
776 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
777 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
784 /* Expand an expression EXP that calls a built-in function,
785 with result going to TARGET if that's convenient
786 (and in mode MODE if that's convenient).
787 SUBTARGET may be used as the target for computing one of EXP's operands.
788 IGNORE is nonzero if the value is to be ignored. */
791 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
792 machine_mode mode ATTRIBUTE_UNUSED,
793 int ignore ATTRIBUTE_UNUSED)
797 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
798 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
799 enum insn_code icode;
800 rtx op[MAX_ARGS], pat;
804 call_expr_arg_iterator iter;
805 unsigned int all_op_flags = opflags_for_builtin (fcode);
806 machine_mode last_vec_mode = VOIDmode;
808 if (TARGET_DEBUG_ARG)
811 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
812 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
813 bflags_for_builtin (fcode));
816 if (S390_USE_TARGET_ATTRIBUTE)
820 bflags = bflags_for_builtin (fcode);
821 if ((bflags & B_HTM) && !TARGET_HTM)
823 error ("builtin %qF is not supported without -mhtm "
824 "(default with -march=zEC12 and higher).", fndecl);
827 if ((bflags & B_VX) && !TARGET_VX)
829 error ("builtin %qF is not supported without -mvx "
830 "(default with -march=z13 and higher).", fndecl);
834 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
835 && fcode < S390_ALL_BUILTIN_MAX)
839 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
841 icode = code_for_builtin[fcode];
842 /* Set a flag in the machine specific cfun part in order to support
843 saving/restoring of FPRs. */
844 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
845 cfun->machine->tbegin_p = true;
847 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
849 error ("unresolved overloaded builtin");
853 internal_error ("bad builtin fcode");
856 internal_error ("bad builtin icode");
858 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
862 machine_mode tmode = insn_data[icode].operand[0].mode;
864 || GET_MODE (target) != tmode
865 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
866 target = gen_reg_rtx (tmode);
868 /* There are builtins (e.g. vec_promote) with no vector
869 arguments but an element selector. So we have to also look
870 at the vector return type when emitting the modulo
872 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
873 last_vec_mode = insn_data[icode].operand[0].mode;
877 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
880 const struct insn_operand_data *insn_op;
881 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
883 all_op_flags = all_op_flags >> O_SHIFT;
885 if (arg == error_mark_node)
887 if (arity >= MAX_ARGS)
890 if (O_IMM_P (op_flags)
891 && TREE_CODE (arg) != INTEGER_CST)
893 error ("constant value required for builtin %qF argument %d",
898 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
901 insn_op = &insn_data[icode].operand[arity + nonvoid];
902 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
904 /* expand_expr truncates constants to the target mode only if it
905 is "convenient". However, our checks below rely on this
907 if (CONST_INT_P (op[arity])
908 && SCALAR_INT_MODE_P (insn_op->mode)
909 && GET_MODE (op[arity]) != insn_op->mode)
910 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
913 /* Wrap the expanded RTX for pointer types into a MEM expr with
914 the proper mode. This allows us to use e.g. (match_operand
915 "memory_operand"..) in the insn patterns instead of (mem
916 (match_operand "address_operand)). This is helpful for
917 patterns not just accepting MEMs. */
918 if (POINTER_TYPE_P (TREE_TYPE (arg))
919 && insn_op->predicate != address_operand)
920 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
922 /* Expand the module operation required on element selectors. */
923 if (op_flags == O_ELEM)
925 gcc_assert (last_vec_mode != VOIDmode);
926 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
928 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
929 NULL_RTX, 1, OPTAB_DIRECT);
932 /* Record the vector mode used for an element selector. This assumes:
933 1. There is no builtin with two different vector modes and an element selector
934 2. The element selector comes after the vector type it is referring to.
935 This currently the true for all the builtins but FIXME we
936 should better check for that. */
937 if (VECTOR_MODE_P (insn_op->mode))
938 last_vec_mode = insn_op->mode;
940 if (insn_op->predicate (op[arity], insn_op->mode))
946 if (MEM_P (op[arity])
947 && insn_op->predicate == memory_operand
948 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
949 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
951 op[arity] = replace_equiv_address (op[arity],
952 copy_to_mode_reg (Pmode,
953 XEXP (op[arity], 0)));
955 /* Some of the builtins require different modes/types than the
956 pattern in order to implement a specific API. Instead of
957 adding many expanders which do the mode change we do it here.
958 E.g. s390_vec_add_u128 required to have vector unsigned char
959 arguments is mapped to addti3. */
960 else if (insn_op->mode != VOIDmode
961 && GET_MODE (op[arity]) != VOIDmode
962 && GET_MODE (op[arity]) != insn_op->mode
963 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
964 GET_MODE (op[arity]), 0))
969 else if (GET_MODE (op[arity]) == insn_op->mode
970 || GET_MODE (op[arity]) == VOIDmode
971 || (insn_op->predicate == address_operand
972 && GET_MODE (op[arity]) == Pmode))
974 /* An address_operand usually has VOIDmode in the expander
975 so we cannot use this. */
976 machine_mode target_mode =
977 (insn_op->predicate == address_operand
978 ? Pmode : insn_op->mode);
979 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
982 if (!insn_op->predicate (op[arity], insn_op->mode))
984 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
993 pat = GEN_FCN (icode) (target);
997 pat = GEN_FCN (icode) (target, op[0]);
999 pat = GEN_FCN (icode) (op[0]);
1003 pat = GEN_FCN (icode) (target, op[0], op[1]);
1005 pat = GEN_FCN (icode) (op[0], op[1]);
1009 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1011 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1015 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1017 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1021 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1023 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1027 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1029 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1045 static const int s390_hotpatch_hw_max = 1000000;
1046 static int s390_hotpatch_hw_before_label = 0;
1047 static int s390_hotpatch_hw_after_label = 0;
1049 /* Check whether the hotpatch attribute is applied to a function and, if it has
1050 an argument, the argument is valid. */
1053 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1054 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1060 if (TREE_CODE (*node) != FUNCTION_DECL)
1062 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1064 *no_add_attrs = true;
1066 if (args != NULL && TREE_CHAIN (args) != NULL)
1068 expr = TREE_VALUE (args);
1069 expr2 = TREE_VALUE (TREE_CHAIN (args));
1071 if (args == NULL || TREE_CHAIN (args) == NULL)
1073 else if (TREE_CODE (expr) != INTEGER_CST
1074 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1075 || wi::gtu_p (expr, s390_hotpatch_hw_max))
1077 else if (TREE_CODE (expr2) != INTEGER_CST
1078 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1079 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
1085 error ("requested %qE attribute is not a comma separated pair of"
1086 " non-negative integer constants or too large (max. %d)", name,
1087 s390_hotpatch_hw_max);
1088 *no_add_attrs = true;
1094 /* Expand the s390_vector_bool type attribute. */
1097 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1098 tree args ATTRIBUTE_UNUSED,
1099 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1101 tree type = *node, result = NULL_TREE;
1104 while (POINTER_TYPE_P (type)
1105 || TREE_CODE (type) == FUNCTION_TYPE
1106 || TREE_CODE (type) == METHOD_TYPE
1107 || TREE_CODE (type) == ARRAY_TYPE)
1108 type = TREE_TYPE (type);
1110 mode = TYPE_MODE (type);
1113 case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break;
1114 case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break;
1115 case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break;
1116 case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI];
1120 *no_add_attrs = true; /* No need to hang on to the attribute. */
1123 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1128 static const struct attribute_spec s390_attribute_table[] = {
1129 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
1130 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
1132 { NULL, 0, 0, false, false, false, NULL, false }
1135 /* Return the alignment for LABEL. We default to the -falign-labels
1136 value except for the literal pool base label. */
1138 s390_label_align (rtx_insn *label)
1140 rtx_insn *prev_insn = prev_active_insn (label);
1143 if (prev_insn == NULL_RTX)
1146 set = single_set (prev_insn);
1148 if (set == NULL_RTX)
1151 src = SET_SRC (set);
1153 /* Don't align literal pool base labels. */
1154 if (GET_CODE (src) == UNSPEC
1155 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1159 return align_labels_log;
1163 s390_libgcc_cmp_return_mode (void)
1165 return TARGET_64BIT ? DImode : SImode;
1169 s390_libgcc_shift_count_mode (void)
1171 return TARGET_64BIT ? DImode : SImode;
1175 s390_unwind_word_mode (void)
1177 return TARGET_64BIT ? DImode : SImode;
1180 /* Return true if the back end supports mode MODE. */
1182 s390_scalar_mode_supported_p (machine_mode mode)
1184 /* In contrast to the default implementation reject TImode constants on 31bit
1185 TARGET_ZARCH for ABI compliance. */
1186 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1189 if (DECIMAL_FLOAT_MODE_P (mode))
1190 return default_decimal_float_supported_p ();
1192 return default_scalar_mode_supported_p (mode);
1195 /* Return true if the back end supports vector mode MODE. */
1197 s390_vector_mode_supported_p (machine_mode mode)
1201 if (!VECTOR_MODE_P (mode)
1203 || GET_MODE_SIZE (mode) > 16)
1206 inner = GET_MODE_INNER (mode);
1224 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1227 s390_set_has_landing_pad_p (bool value)
1229 cfun->machine->has_landing_pad_p = value;
1232 /* If two condition code modes are compatible, return a condition code
1233 mode which is compatible with both. Otherwise, return
1237 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1245 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1246 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1267 /* Return true if SET either doesn't set the CC register, or else
1268 the source and destination have matching CC modes and that
1269 CC mode is at least as constrained as REQ_MODE. */
1272 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1274 machine_mode set_mode;
1276 gcc_assert (GET_CODE (set) == SET);
1278 /* These modes are supposed to be used only in CC consumer
1280 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1281 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1283 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1286 set_mode = GET_MODE (SET_DEST (set));
1305 if (req_mode != set_mode)
1310 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1311 && req_mode != CCSRmode && req_mode != CCURmode)
1317 if (req_mode != CCAmode)
1325 return (GET_MODE (SET_SRC (set)) == set_mode);
1328 /* Return true if every SET in INSN that sets the CC register
1329 has source and destination with matching CC modes and that
1330 CC mode is at least as constrained as REQ_MODE.
1331 If REQ_MODE is VOIDmode, always return false. */
1334 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1338 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1339 if (req_mode == VOIDmode)
1342 if (GET_CODE (PATTERN (insn)) == SET)
1343 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1345 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1346 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1348 rtx set = XVECEXP (PATTERN (insn), 0, i);
1349 if (GET_CODE (set) == SET)
1350 if (!s390_match_ccmode_set (set, req_mode))
1357 /* If a test-under-mask instruction can be used to implement
1358 (compare (and ... OP1) OP2), return the CC mode required
1359 to do that. Otherwise, return VOIDmode.
1360 MIXED is true if the instruction can distinguish between
1361 CC1 and CC2 for mixed selected bits (TMxx), it is false
1362 if the instruction cannot (TM). */
1365 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1369 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1370 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1373 /* Selected bits all zero: CC0.
1374 e.g.: int a; if ((a & (16 + 128)) == 0) */
1375 if (INTVAL (op2) == 0)
1378 /* Selected bits all one: CC3.
1379 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1380 if (INTVAL (op2) == INTVAL (op1))
1383 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1385 if ((a & (16 + 128)) == 16) -> CCT1
1386 if ((a & (16 + 128)) == 128) -> CCT2 */
1389 bit1 = exact_log2 (INTVAL (op2));
1390 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1391 if (bit0 != -1 && bit1 != -1)
1392 return bit0 > bit1 ? CCT1mode : CCT2mode;
1398 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1399 OP0 and OP1 of a COMPARE, return the mode to be used for the
1403 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1409 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1410 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1412 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1413 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1415 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1416 || GET_CODE (op1) == NEG)
1417 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1420 if (GET_CODE (op0) == AND)
1422 /* Check whether we can potentially do it via TM. */
1423 machine_mode ccmode;
1424 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1425 if (ccmode != VOIDmode)
1427 /* Relax CCTmode to CCZmode to allow fall-back to AND
1428 if that turns out to be beneficial. */
1429 return ccmode == CCTmode ? CCZmode : ccmode;
1433 if (register_operand (op0, HImode)
1434 && GET_CODE (op1) == CONST_INT
1435 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1437 if (register_operand (op0, QImode)
1438 && GET_CODE (op1) == CONST_INT
1439 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1448 /* The only overflow condition of NEG and ABS happens when
1449 -INT_MAX is used as parameter, which stays negative. So
1450 we have an overflow from a positive value to a negative.
1451 Using CCAP mode the resulting cc can be used for comparisons. */
1452 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1453 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1456 /* If constants are involved in an add instruction it is possible to use
1457 the resulting cc for comparisons with zero. Knowing the sign of the
1458 constant the overflow behavior gets predictable. e.g.:
1459 int a, b; if ((b = a + c) > 0)
1460 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1461 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1462 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1463 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1464 /* Avoid INT32_MIN on 32 bit. */
1465 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1467 if (INTVAL (XEXP((op0), 1)) < 0)
1481 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1482 && GET_CODE (op1) != CONST_INT)
1488 if (GET_CODE (op0) == PLUS
1489 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1492 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1493 && GET_CODE (op1) != CONST_INT)
1499 if (GET_CODE (op0) == MINUS
1500 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1503 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1504 && GET_CODE (op1) != CONST_INT)
1513 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1514 that we can implement more efficiently. */
1517 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1518 bool op0_preserve_value)
1520 if (op0_preserve_value)
1523 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1524 if ((*code == EQ || *code == NE)
1525 && *op1 == const0_rtx
1526 && GET_CODE (*op0) == ZERO_EXTRACT
1527 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1528 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1529 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1531 rtx inner = XEXP (*op0, 0);
1532 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1533 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1534 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1536 if (len > 0 && len < modesize
1537 && pos >= 0 && pos + len <= modesize
1538 && modesize <= HOST_BITS_PER_WIDE_INT)
1540 unsigned HOST_WIDE_INT block;
1541 block = (HOST_WIDE_INT_1U << len) - 1;
1542 block <<= modesize - pos - len;
1544 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1545 gen_int_mode (block, GET_MODE (inner)));
1549 /* Narrow AND of memory against immediate to enable TM. */
1550 if ((*code == EQ || *code == NE)
1551 && *op1 == const0_rtx
1552 && GET_CODE (*op0) == AND
1553 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1554 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1556 rtx inner = XEXP (*op0, 0);
1557 rtx mask = XEXP (*op0, 1);
1559 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1560 if (GET_CODE (inner) == SUBREG
1561 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1562 && (GET_MODE_SIZE (GET_MODE (inner))
1563 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1565 & GET_MODE_MASK (GET_MODE (inner))
1566 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1568 inner = SUBREG_REG (inner);
1570 /* Do not change volatile MEMs. */
1571 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1573 int part = s390_single_part (XEXP (*op0, 1),
1574 GET_MODE (inner), QImode, 0);
1577 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1578 inner = adjust_address_nv (inner, QImode, part);
1579 *op0 = gen_rtx_AND (QImode, inner, mask);
1584 /* Narrow comparisons against 0xffff to HImode if possible. */
1585 if ((*code == EQ || *code == NE)
1586 && GET_CODE (*op1) == CONST_INT
1587 && INTVAL (*op1) == 0xffff
1588 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1589 && (nonzero_bits (*op0, GET_MODE (*op0))
1590 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1592 *op0 = gen_lowpart (HImode, *op0);
1596 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1597 if (GET_CODE (*op0) == UNSPEC
1598 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1599 && XVECLEN (*op0, 0) == 1
1600 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1601 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1602 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1603 && *op1 == const0_rtx)
1605 enum rtx_code new_code = UNKNOWN;
1608 case EQ: new_code = EQ; break;
1609 case NE: new_code = NE; break;
1610 case LT: new_code = GTU; break;
1611 case GT: new_code = LTU; break;
1612 case LE: new_code = GEU; break;
1613 case GE: new_code = LEU; break;
1617 if (new_code != UNKNOWN)
1619 *op0 = XVECEXP (*op0, 0, 0);
1624 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1625 if (GET_CODE (*op0) == UNSPEC
1626 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1627 && XVECLEN (*op0, 0) == 1
1628 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1629 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1630 && CONST_INT_P (*op1))
1632 enum rtx_code new_code = UNKNOWN;
1633 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1639 case EQ: new_code = EQ; break;
1640 case NE: new_code = NE; break;
1647 if (new_code != UNKNOWN)
1649 /* For CCRAWmode put the required cc mask into the second
1651 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1652 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1653 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1654 *op0 = XVECEXP (*op0, 0, 0);
1659 /* Simplify cascaded EQ, NE with const0_rtx. */
1660 if ((*code == NE || *code == EQ)
1661 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1662 && GET_MODE (*op0) == SImode
1663 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1664 && REG_P (XEXP (*op0, 0))
1665 && XEXP (*op0, 1) == const0_rtx
1666 && *op1 == const0_rtx)
1668 if ((*code == EQ && GET_CODE (*op0) == NE)
1669 || (*code == NE && GET_CODE (*op0) == EQ))
1673 *op0 = XEXP (*op0, 0);
1676 /* Prefer register over memory as first operand. */
1677 if (MEM_P (*op0) && REG_P (*op1))
1679 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1680 *code = (int)swap_condition ((enum rtx_code)*code);
1683 /* A comparison result is compared against zero. Replace it with
1684 the (perhaps inverted) original comparison.
1685 This probably should be done by simplify_relational_operation. */
1686 if ((*code == EQ || *code == NE)
1687 && *op1 == const0_rtx
1688 && COMPARISON_P (*op0)
1689 && CC_REG_P (XEXP (*op0, 0)))
1691 enum rtx_code new_code;
1694 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1696 XEXP (*op1, 0), NULL);
1698 new_code = GET_CODE (*op0);
1700 if (new_code != UNKNOWN)
1703 *op1 = XEXP (*op0, 1);
1704 *op0 = XEXP (*op0, 0);
1710 /* Emit a compare instruction suitable to implement the comparison
1711 OP0 CODE OP1. Return the correct condition RTL to be placed in
1712 the IF_THEN_ELSE of the conditional branch testing the result. */
1715 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1717 machine_mode mode = s390_select_ccmode (code, op0, op1);
1720 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1722 /* Do not output a redundant compare instruction if a
1723 compare_and_swap pattern already computed the result and the
1724 machine modes are compatible. */
1725 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1731 cc = gen_rtx_REG (mode, CC_REGNUM);
1732 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1735 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1738 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1740 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1741 conditional branch testing the result. */
1744 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1745 rtx cmp, rtx new_rtx)
1747 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1748 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1752 /* Emit a jump instruction to TARGET and return it. If COND is
1753 NULL_RTX, emit an unconditional jump, else a conditional jump under
1757 s390_emit_jump (rtx target, rtx cond)
1761 target = gen_rtx_LABEL_REF (VOIDmode, target);
1763 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1765 insn = gen_rtx_SET (pc_rtx, target);
1766 return emit_jump_insn (insn);
1769 /* Return branch condition mask to implement a branch
1770 specified by CODE. Return -1 for invalid comparisons. */
1773 s390_branch_condition_mask (rtx code)
1775 const int CC0 = 1 << 3;
1776 const int CC1 = 1 << 2;
1777 const int CC2 = 1 << 1;
1778 const int CC3 = 1 << 0;
1780 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1781 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1782 gcc_assert (XEXP (code, 1) == const0_rtx
1783 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1784 && CONST_INT_P (XEXP (code, 1))));
1787 switch (GET_MODE (XEXP (code, 0)))
1791 switch (GET_CODE (code))
1793 case EQ: return CC0;
1794 case NE: return CC1 | CC2 | CC3;
1800 switch (GET_CODE (code))
1802 case EQ: return CC1;
1803 case NE: return CC0 | CC2 | CC3;
1809 switch (GET_CODE (code))
1811 case EQ: return CC2;
1812 case NE: return CC0 | CC1 | CC3;
1818 switch (GET_CODE (code))
1820 case EQ: return CC3;
1821 case NE: return CC0 | CC1 | CC2;
1827 switch (GET_CODE (code))
1829 case EQ: return CC0 | CC2;
1830 case NE: return CC1 | CC3;
1836 switch (GET_CODE (code))
1838 case LTU: return CC2 | CC3; /* carry */
1839 case GEU: return CC0 | CC1; /* no carry */
1845 switch (GET_CODE (code))
1847 case GTU: return CC0 | CC1; /* borrow */
1848 case LEU: return CC2 | CC3; /* no borrow */
1854 switch (GET_CODE (code))
1856 case EQ: return CC0 | CC2;
1857 case NE: return CC1 | CC3;
1858 case LTU: return CC1;
1859 case GTU: return CC3;
1860 case LEU: return CC1 | CC2;
1861 case GEU: return CC2 | CC3;
1866 switch (GET_CODE (code))
1868 case EQ: return CC0;
1869 case NE: return CC1 | CC2 | CC3;
1870 case LTU: return CC1;
1871 case GTU: return CC2;
1872 case LEU: return CC0 | CC1;
1873 case GEU: return CC0 | CC2;
1879 switch (GET_CODE (code))
1881 case EQ: return CC0;
1882 case NE: return CC2 | CC1 | CC3;
1883 case LTU: return CC2;
1884 case GTU: return CC1;
1885 case LEU: return CC0 | CC2;
1886 case GEU: return CC0 | CC1;
1892 switch (GET_CODE (code))
1894 case EQ: return CC0;
1895 case NE: return CC1 | CC2 | CC3;
1896 case LT: return CC1 | CC3;
1897 case GT: return CC2;
1898 case LE: return CC0 | CC1 | CC3;
1899 case GE: return CC0 | CC2;
1905 switch (GET_CODE (code))
1907 case EQ: return CC0;
1908 case NE: return CC1 | CC2 | CC3;
1909 case LT: return CC1;
1910 case GT: return CC2 | CC3;
1911 case LE: return CC0 | CC1;
1912 case GE: return CC0 | CC2 | CC3;
1918 switch (GET_CODE (code))
1920 case EQ: return CC0;
1921 case NE: return CC1 | CC2 | CC3;
1922 case LT: return CC1;
1923 case GT: return CC2;
1924 case LE: return CC0 | CC1;
1925 case GE: return CC0 | CC2;
1926 case UNORDERED: return CC3;
1927 case ORDERED: return CC0 | CC1 | CC2;
1928 case UNEQ: return CC0 | CC3;
1929 case UNLT: return CC1 | CC3;
1930 case UNGT: return CC2 | CC3;
1931 case UNLE: return CC0 | CC1 | CC3;
1932 case UNGE: return CC0 | CC2 | CC3;
1933 case LTGT: return CC1 | CC2;
1939 switch (GET_CODE (code))
1941 case EQ: return CC0;
1942 case NE: return CC2 | CC1 | CC3;
1943 case LT: return CC2;
1944 case GT: return CC1;
1945 case LE: return CC0 | CC2;
1946 case GE: return CC0 | CC1;
1947 case UNORDERED: return CC3;
1948 case ORDERED: return CC0 | CC2 | CC1;
1949 case UNEQ: return CC0 | CC3;
1950 case UNLT: return CC2 | CC3;
1951 case UNGT: return CC1 | CC3;
1952 case UNLE: return CC0 | CC2 | CC3;
1953 case UNGE: return CC0 | CC1 | CC3;
1954 case LTGT: return CC2 | CC1;
1959 /* Vector comparison modes. */
1960 /* CC2 will never be set. It however is part of the negated
1963 switch (GET_CODE (code))
1968 case GE: return CC0;
1969 /* The inverted modes are in fact *any* modes. */
1973 case LT: return CC3 | CC1 | CC2;
1978 switch (GET_CODE (code))
1983 case GE: return CC0 | CC1;
1984 /* The inverted modes are in fact *all* modes. */
1988 case LT: return CC3 | CC2;
1992 switch (GET_CODE (code))
1996 case GE: return CC0;
1997 /* The inverted modes are in fact *any* modes. */
2000 case UNLT: return CC3 | CC1 | CC2;
2005 switch (GET_CODE (code))
2009 case GE: return CC0 | CC1;
2010 /* The inverted modes are in fact *all* modes. */
2013 case UNLT: return CC3 | CC2;
2018 switch (GET_CODE (code))
2021 return INTVAL (XEXP (code, 1));
2023 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2034 /* Return branch condition mask to implement a compare and branch
2035 specified by CODE. Return -1 for invalid comparisons. */
2038 s390_compare_and_branch_condition_mask (rtx code)
2040 const int CC0 = 1 << 3;
2041 const int CC1 = 1 << 2;
2042 const int CC2 = 1 << 1;
2044 switch (GET_CODE (code))
2068 /* If INV is false, return assembler mnemonic string to implement
2069 a branch specified by CODE. If INV is true, return mnemonic
2070 for the corresponding inverted branch. */
2073 s390_branch_condition_mnemonic (rtx code, int inv)
2077 static const char *const mnemonic[16] =
2079 NULL, "o", "h", "nle",
2080 "l", "nhe", "lh", "ne",
2081 "e", "nlh", "he", "nl",
2082 "le", "nh", "no", NULL
2085 if (GET_CODE (XEXP (code, 0)) == REG
2086 && REGNO (XEXP (code, 0)) == CC_REGNUM
2087 && (XEXP (code, 1) == const0_rtx
2088 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2089 && CONST_INT_P (XEXP (code, 1)))))
2090 mask = s390_branch_condition_mask (code);
2092 mask = s390_compare_and_branch_condition_mask (code);
2094 gcc_assert (mask >= 0);
2099 gcc_assert (mask >= 1 && mask <= 14);
2101 return mnemonic[mask];
2104 /* Return the part of op which has a value different from def.
2105 The size of the part is determined by mode.
2106 Use this function only if you already know that op really
2107 contains such a part. */
2109 unsigned HOST_WIDE_INT
2110 s390_extract_part (rtx op, machine_mode mode, int def)
2112 unsigned HOST_WIDE_INT value = 0;
2113 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2114 int part_bits = GET_MODE_BITSIZE (mode);
2115 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2118 for (i = 0; i < max_parts; i++)
2121 value = UINTVAL (op);
2123 value >>= part_bits;
2125 if ((value & part_mask) != (def & part_mask))
2126 return value & part_mask;
2132 /* If OP is an integer constant of mode MODE with exactly one
2133 part of mode PART_MODE unequal to DEF, return the number of that
2134 part. Otherwise, return -1. */
2137 s390_single_part (rtx op,
2139 machine_mode part_mode,
2142 unsigned HOST_WIDE_INT value = 0;
2143 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2144 unsigned HOST_WIDE_INT part_mask
2145 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2148 if (GET_CODE (op) != CONST_INT)
2151 for (i = 0; i < n_parts; i++)
2154 value = UINTVAL (op);
2156 value >>= GET_MODE_BITSIZE (part_mode);
2158 if ((value & part_mask) != (def & part_mask))
2166 return part == -1 ? -1 : n_parts - 1 - part;
2169 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2170 bits and no other bits are set in (the lower SIZE bits of) IN.
2172 PSTART and PEND can be used to obtain the start and end
2173 position (inclusive) of the bitfield relative to 64
2174 bits. *PSTART / *PEND gives the position of the first/last bit
2175 of the bitfield counting from the highest order bit starting
2179 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2180 int *pstart, int *pend)
2184 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2185 int highbit = HOST_BITS_PER_WIDE_INT - size;
2186 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2188 gcc_assert (!!pstart == !!pend);
2189 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2192 /* Look for the rightmost bit of a contiguous range of ones. */
2199 /* Look for the firt zero bit after the range of ones. */
2200 if (! (bitmask & in))
2204 /* We're one past the last one-bit. */
2208 /* No one bits found. */
2211 if (start > highbit)
2213 unsigned HOST_WIDE_INT mask;
2215 /* Calculate a mask for all bits beyond the contiguous bits. */
2216 mask = ((~HOST_WIDE_INT_0U >> highbit)
2217 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2219 /* There are more bits set beyond the first range of one bits. */
2232 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2233 if ~IN contains a contiguous bitfield. In that case, *END is <
2236 If WRAP_P is true, a bitmask that wraps around is also tested.
2237 When a wraparoud occurs *START is greater than *END (in
2238 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2239 part of the range. If WRAP_P is false, no wraparound is
2243 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2244 int size, int *start, int *end)
2246 int bs = HOST_BITS_PER_WIDE_INT;
2249 gcc_assert (!!start == !!end);
2250 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2251 /* This cannot be expressed as a contiguous bitmask. Exit early because
2252 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2255 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2260 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2266 gcc_assert (s >= 1);
2267 *start = ((e + 1) & (bs - 1));
2268 *end = ((s - 1 + bs) & (bs - 1));
2274 /* Return true if OP contains the same contiguous bitfield in *all*
2275 its elements. START and END can be used to obtain the start and
2276 end position of the bitfield.
2278 START/STOP give the position of the first/last bit of the bitfield
2279 counting from the lowest order bit starting with zero. In order to
2280 use these values for S/390 instructions this has to be converted to
2281 "bits big endian" style. */
2284 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2286 unsigned HOST_WIDE_INT mask;
2291 gcc_assert (!!start == !!end);
2292 if (!const_vec_duplicate_p (op, &elt)
2293 || !CONST_INT_P (elt))
2296 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2298 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2302 mask = UINTVAL (elt);
2304 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2309 *start -= (HOST_BITS_PER_WIDE_INT - size);
2310 *end -= (HOST_BITS_PER_WIDE_INT - size);
2318 /* Return true if C consists only of byte chunks being either 0 or
2319 0xff. If MASK is !=NULL a byte mask is generated which is
2320 appropriate for the vector generate byte mask instruction. */
2323 s390_bytemask_vector_p (rtx op, unsigned *mask)
2326 unsigned tmp_mask = 0;
2327 int nunit, unit_size;
2329 if (!VECTOR_MODE_P (GET_MODE (op))
2330 || GET_CODE (op) != CONST_VECTOR
2331 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2334 nunit = GET_MODE_NUNITS (GET_MODE (op));
2335 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2337 for (i = 0; i < nunit; i++)
2339 unsigned HOST_WIDE_INT c;
2342 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2345 c = UINTVAL (XVECEXP (op, 0, i));
2346 for (j = 0; j < unit_size; j++)
2348 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2350 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2351 c = c >> BITS_PER_UNIT;
2361 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2362 equivalent to a shift followed by the AND. In particular, CONTIG
2363 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2364 for ROTL indicate a rotate to the right. */
2367 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2372 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2376 return (64 - end >= rotl);
2379 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2381 rotl = -rotl + (64 - bitsize);
2382 return (start >= rotl);
2386 /* Check whether we can (and want to) split a double-word
2387 move in mode MODE from SRC to DST into two single-word
2388 moves, moving the subword FIRST_SUBWORD first. */
2391 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2393 /* Floating point and vector registers cannot be split. */
2394 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2397 /* Non-offsettable memory references cannot be split. */
2398 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2399 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2402 /* Moving the first subword must not clobber a register
2403 needed to move the second subword. */
2404 if (register_operand (dst, mode))
2406 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2407 if (reg_overlap_mentioned_p (subreg, src))
2414 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2415 and [MEM2, MEM2 + SIZE] do overlap and false
2419 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2421 rtx addr1, addr2, addr_delta;
2422 HOST_WIDE_INT delta;
2424 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2430 addr1 = XEXP (mem1, 0);
2431 addr2 = XEXP (mem2, 0);
2433 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2435 /* This overlapping check is used by peepholes merging memory block operations.
2436 Overlapping operations would otherwise be recognized by the S/390 hardware
2437 and would fall back to a slower implementation. Allowing overlapping
2438 operations would lead to slow code but not to wrong code. Therefore we are
2439 somewhat optimistic if we cannot prove that the memory blocks are
2441 That's why we return false here although this may accept operations on
2442 overlapping memory areas. */
2443 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2446 delta = INTVAL (addr_delta);
2449 || (delta > 0 && delta < size)
2450 || (delta < 0 && -delta < size))
2456 /* Check whether the address of memory reference MEM2 equals exactly
2457 the address of memory reference MEM1 plus DELTA. Return true if
2458 we can prove this to be the case, false otherwise. */
2461 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2463 rtx addr1, addr2, addr_delta;
2465 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2468 addr1 = XEXP (mem1, 0);
2469 addr2 = XEXP (mem2, 0);
2471 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2472 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2478 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2481 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2484 machine_mode wmode = mode;
2485 rtx dst = operands[0];
2486 rtx src1 = operands[1];
2487 rtx src2 = operands[2];
2490 /* If we cannot handle the operation directly, use a temp register. */
2491 if (!s390_logical_operator_ok_p (operands))
2492 dst = gen_reg_rtx (mode);
2494 /* QImode and HImode patterns make sense only if we have a destination
2495 in memory. Otherwise perform the operation in SImode. */
2496 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2499 /* Widen operands if required. */
2502 if (GET_CODE (dst) == SUBREG
2503 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2505 else if (REG_P (dst))
2506 dst = gen_rtx_SUBREG (wmode, dst, 0);
2508 dst = gen_reg_rtx (wmode);
2510 if (GET_CODE (src1) == SUBREG
2511 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2513 else if (GET_MODE (src1) != VOIDmode)
2514 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2516 if (GET_CODE (src2) == SUBREG
2517 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2519 else if (GET_MODE (src2) != VOIDmode)
2520 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2523 /* Emit the instruction. */
2524 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2525 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2526 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2528 /* Fix up the destination if needed. */
2529 if (dst != operands[0])
2530 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2533 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2536 s390_logical_operator_ok_p (rtx *operands)
2538 /* If the destination operand is in memory, it needs to coincide
2539 with one of the source operands. After reload, it has to be
2540 the first source operand. */
2541 if (GET_CODE (operands[0]) == MEM)
2542 return rtx_equal_p (operands[0], operands[1])
2543 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2548 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2549 operand IMMOP to switch from SS to SI type instructions. */
2552 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2554 int def = code == AND ? -1 : 0;
2558 gcc_assert (GET_CODE (*memop) == MEM);
2559 gcc_assert (!MEM_VOLATILE_P (*memop));
2561 mask = s390_extract_part (*immop, QImode, def);
2562 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2563 gcc_assert (part >= 0);
2565 *memop = adjust_address (*memop, QImode, part);
2566 *immop = gen_int_mode (mask, QImode);
2570 /* How to allocate a 'struct machine_function'. */
2572 static struct machine_function *
2573 s390_init_machine_status (void)
2575 return ggc_cleared_alloc<machine_function> ();
2578 /* Map for smallest class containing reg regno. */
2580 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2581 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2582 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2583 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2584 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2585 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2586 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2587 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2588 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2589 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2590 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2591 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2592 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2593 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2594 VEC_REGS, VEC_REGS /* 52 */
2597 /* Return attribute type of insn. */
2599 static enum attr_type
2600 s390_safe_attr_type (rtx_insn *insn)
2602 if (recog_memoized (insn) >= 0)
2603 return get_attr_type (insn);
2608 /* Return true if DISP is a valid short displacement. */
2611 s390_short_displacement (rtx disp)
2613 /* No displacement is OK. */
2617 /* Without the long displacement facility we don't need to
2618 distingiush between long and short displacement. */
2619 if (!TARGET_LONG_DISPLACEMENT)
2622 /* Integer displacement in range. */
2623 if (GET_CODE (disp) == CONST_INT)
2624 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2626 /* GOT offset is not OK, the GOT can be large. */
2627 if (GET_CODE (disp) == CONST
2628 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2629 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2630 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2633 /* All other symbolic constants are literal pool references,
2634 which are OK as the literal pool must be small. */
2635 if (GET_CODE (disp) == CONST)
2641 /* Decompose a RTL expression ADDR for a memory address into
2642 its components, returned in OUT.
2644 Returns false if ADDR is not a valid memory address, true
2645 otherwise. If OUT is NULL, don't return the components,
2646 but check for validity only.
2648 Note: Only addresses in canonical form are recognized.
2649 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2650 canonical form so that they will be recognized. */
2653 s390_decompose_address (rtx addr, struct s390_address *out)
2655 HOST_WIDE_INT offset = 0;
2656 rtx base = NULL_RTX;
2657 rtx indx = NULL_RTX;
2658 rtx disp = NULL_RTX;
2660 bool pointer = false;
2661 bool base_ptr = false;
2662 bool indx_ptr = false;
2663 bool literal_pool = false;
2665 /* We may need to substitute the literal pool base register into the address
2666 below. However, at this point we do not know which register is going to
2667 be used as base, so we substitute the arg pointer register. This is going
2668 to be treated as holding a pointer below -- it shouldn't be used for any
2670 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2672 /* Decompose address into base + index + displacement. */
2674 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2677 else if (GET_CODE (addr) == PLUS)
2679 rtx op0 = XEXP (addr, 0);
2680 rtx op1 = XEXP (addr, 1);
2681 enum rtx_code code0 = GET_CODE (op0);
2682 enum rtx_code code1 = GET_CODE (op1);
2684 if (code0 == REG || code0 == UNSPEC)
2686 if (code1 == REG || code1 == UNSPEC)
2688 indx = op0; /* index + base */
2694 base = op0; /* base + displacement */
2699 else if (code0 == PLUS)
2701 indx = XEXP (op0, 0); /* index + base + disp */
2702 base = XEXP (op0, 1);
2713 disp = addr; /* displacement */
2715 /* Extract integer part of displacement. */
2719 if (GET_CODE (disp) == CONST_INT)
2721 offset = INTVAL (disp);
2724 else if (GET_CODE (disp) == CONST
2725 && GET_CODE (XEXP (disp, 0)) == PLUS
2726 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2728 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2729 disp = XEXP (XEXP (disp, 0), 0);
2733 /* Strip off CONST here to avoid special case tests later. */
2734 if (disp && GET_CODE (disp) == CONST)
2735 disp = XEXP (disp, 0);
2737 /* We can convert literal pool addresses to
2738 displacements by basing them off the base register. */
2739 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2744 base = fake_pool_base, literal_pool = true;
2746 /* Mark up the displacement. */
2747 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2748 UNSPEC_LTREL_OFFSET);
2751 /* Validate base register. */
2754 if (GET_CODE (base) == UNSPEC)
2755 switch (XINT (base, 1))
2759 disp = gen_rtx_UNSPEC (Pmode,
2760 gen_rtvec (1, XVECEXP (base, 0, 0)),
2761 UNSPEC_LTREL_OFFSET);
2765 base = XVECEXP (base, 0, 1);
2768 case UNSPEC_LTREL_BASE:
2769 if (XVECLEN (base, 0) == 1)
2770 base = fake_pool_base, literal_pool = true;
2772 base = XVECEXP (base, 0, 1);
2779 if (!REG_P (base) || GET_MODE (base) != Pmode)
2782 if (REGNO (base) == STACK_POINTER_REGNUM
2783 || REGNO (base) == FRAME_POINTER_REGNUM
2784 || ((reload_completed || reload_in_progress)
2785 && frame_pointer_needed
2786 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2787 || REGNO (base) == ARG_POINTER_REGNUM
2789 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2790 pointer = base_ptr = true;
2792 if ((reload_completed || reload_in_progress)
2793 && base == cfun->machine->base_reg)
2794 pointer = base_ptr = literal_pool = true;
2797 /* Validate index register. */
2800 if (GET_CODE (indx) == UNSPEC)
2801 switch (XINT (indx, 1))
2805 disp = gen_rtx_UNSPEC (Pmode,
2806 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2807 UNSPEC_LTREL_OFFSET);
2811 indx = XVECEXP (indx, 0, 1);
2814 case UNSPEC_LTREL_BASE:
2815 if (XVECLEN (indx, 0) == 1)
2816 indx = fake_pool_base, literal_pool = true;
2818 indx = XVECEXP (indx, 0, 1);
2825 if (!REG_P (indx) || GET_MODE (indx) != Pmode)
2828 if (REGNO (indx) == STACK_POINTER_REGNUM
2829 || REGNO (indx) == FRAME_POINTER_REGNUM
2830 || ((reload_completed || reload_in_progress)
2831 && frame_pointer_needed
2832 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2833 || REGNO (indx) == ARG_POINTER_REGNUM
2835 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2836 pointer = indx_ptr = true;
2838 if ((reload_completed || reload_in_progress)
2839 && indx == cfun->machine->base_reg)
2840 pointer = indx_ptr = literal_pool = true;
2843 /* Prefer to use pointer as base, not index. */
2844 if (base && indx && !base_ptr
2845 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2852 /* Validate displacement. */
2855 /* If virtual registers are involved, the displacement will change later
2856 anyway as the virtual registers get eliminated. This could make a
2857 valid displacement invalid, but it is more likely to make an invalid
2858 displacement valid, because we sometimes access the register save area
2859 via negative offsets to one of those registers.
2860 Thus we don't check the displacement for validity here. If after
2861 elimination the displacement turns out to be invalid after all,
2862 this is fixed up by reload in any case. */
2863 /* LRA maintains always displacements up to date and we need to
2864 know the displacement is right during all LRA not only at the
2865 final elimination. */
2867 || (base != arg_pointer_rtx
2868 && indx != arg_pointer_rtx
2869 && base != return_address_pointer_rtx
2870 && indx != return_address_pointer_rtx
2871 && base != frame_pointer_rtx
2872 && indx != frame_pointer_rtx
2873 && base != virtual_stack_vars_rtx
2874 && indx != virtual_stack_vars_rtx))
2875 if (!DISP_IN_RANGE (offset))
2880 /* All the special cases are pointers. */
2883 /* In the small-PIC case, the linker converts @GOT
2884 and @GOTNTPOFF offsets to possible displacements. */
2885 if (GET_CODE (disp) == UNSPEC
2886 && (XINT (disp, 1) == UNSPEC_GOT
2887 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2893 /* Accept pool label offsets. */
2894 else if (GET_CODE (disp) == UNSPEC
2895 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2898 /* Accept literal pool references. */
2899 else if (GET_CODE (disp) == UNSPEC
2900 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2902 /* In case CSE pulled a non literal pool reference out of
2903 the pool we have to reject the address. This is
2904 especially important when loading the GOT pointer on non
2905 zarch CPUs. In this case the literal pool contains an lt
2906 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2907 will most likely exceed the displacement. */
2908 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2909 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2912 orig_disp = gen_rtx_CONST (Pmode, disp);
2915 /* If we have an offset, make sure it does not
2916 exceed the size of the constant pool entry. */
2917 rtx sym = XVECEXP (disp, 0, 0);
2918 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2921 orig_disp = plus_constant (Pmode, orig_disp, offset);
2936 out->disp = orig_disp;
2937 out->pointer = pointer;
2938 out->literal_pool = literal_pool;
2944 /* Decompose a RTL expression OP for an address style operand into its
2945 components, and return the base register in BASE and the offset in
2946 OFFSET. While OP looks like an address it is never supposed to be
2949 Return true if OP is a valid address operand, false if not. */
2952 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
2953 HOST_WIDE_INT *offset)
2957 /* We can have an integer constant, an address register,
2958 or a sum of the two. */
2959 if (CONST_SCALAR_INT_P (op))
2964 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
2969 while (op && GET_CODE (op) == SUBREG)
2970 op = SUBREG_REG (op);
2972 if (op && GET_CODE (op) != REG)
2977 if (off == NULL_RTX)
2979 else if (CONST_INT_P (off))
2980 *offset = INTVAL (off);
2981 else if (CONST_WIDE_INT_P (off))
2982 /* The offset will anyway be cut down to 12 bits so take just
2983 the lowest order chunk of the wide int. */
2984 *offset = CONST_WIDE_INT_ELT (off, 0);
2995 /* Return true if CODE is a valid address without index. */
2998 s390_legitimate_address_without_index_p (rtx op)
3000 struct s390_address addr;
3002 if (!s390_decompose_address (XEXP (op, 0), &addr))
3011 /* Return TRUE if ADDR is an operand valid for a load/store relative
3012 instruction. Be aware that the alignment of the operand needs to
3013 be checked separately.
3014 Valid addresses are single references or a sum of a reference and a
3015 constant integer. Return these parts in SYMREF and ADDEND. You can
3016 pass NULL in REF and/or ADDEND if you are not interested in these
3017 values. Literal pool references are *not* considered symbol
3021 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3023 HOST_WIDE_INT tmpaddend = 0;
3025 if (GET_CODE (addr) == CONST)
3026 addr = XEXP (addr, 0);
3028 if (GET_CODE (addr) == PLUS)
3030 if (!CONST_INT_P (XEXP (addr, 1)))
3033 tmpaddend = INTVAL (XEXP (addr, 1));
3034 addr = XEXP (addr, 0);
3037 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3038 || (GET_CODE (addr) == UNSPEC
3039 && (XINT (addr, 1) == UNSPEC_GOTENT
3040 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3045 *addend = tmpaddend;
3052 /* Return true if the address in OP is valid for constraint letter C
3053 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3054 pool MEMs should be accepted. Only the Q, R, S, T constraint
3055 letters are allowed for C. */
3058 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3060 struct s390_address addr;
3061 bool decomposed = false;
3063 /* This check makes sure that no symbolic address (except literal
3064 pool references) are accepted by the R or T constraints. */
3065 if (s390_loadrelative_operand_p (op, NULL, NULL))
3068 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3071 if (!s390_decompose_address (op, &addr))
3073 if (addr.literal_pool)
3078 /* With reload, we sometimes get intermediate address forms that are
3079 actually invalid as-is, but we need to accept them in the most
3080 generic cases below ('R' or 'T'), since reload will in fact fix
3081 them up. LRA behaves differently here; we never see such forms,
3082 but on the other hand, we need to strictly reject every invalid
3083 address form. Perform this check right up front. */
3084 if (lra_in_progress)
3086 if (!decomposed && !s390_decompose_address (op, &addr))
3093 case 'Q': /* no index short displacement */
3094 if (!decomposed && !s390_decompose_address (op, &addr))
3098 if (!s390_short_displacement (addr.disp))
3102 case 'R': /* with index short displacement */
3103 if (TARGET_LONG_DISPLACEMENT)
3105 if (!decomposed && !s390_decompose_address (op, &addr))
3107 if (!s390_short_displacement (addr.disp))
3110 /* Any invalid address here will be fixed up by reload,
3111 so accept it for the most generic constraint. */
3114 case 'S': /* no index long displacement */
3115 if (!decomposed && !s390_decompose_address (op, &addr))
3121 case 'T': /* with index long displacement */
3122 /* Any invalid address here will be fixed up by reload,
3123 so accept it for the most generic constraint. */
3133 /* Evaluates constraint strings described by the regular expression
3134 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3135 the constraint given in STR, or 0 else. */
3138 s390_mem_constraint (const char *str, rtx op)
3145 /* Check for offsettable variants of memory constraints. */
3146 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3148 if ((reload_completed || reload_in_progress)
3149 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3151 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3153 /* Check for non-literal-pool variants of memory constraints. */
3156 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3161 if (GET_CODE (op) != MEM)
3163 return s390_check_qrst_address (c, XEXP (op, 0), true);
3165 /* Simply check for the basic form of a shift count. Reload will
3166 take care of making sure we have a proper base register. */
3167 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3171 return s390_check_qrst_address (str[1], op, true);
3179 /* Evaluates constraint strings starting with letter O. Input
3180 parameter C is the second letter following the "O" in the constraint
3181 string. Returns 1 if VALUE meets the respective constraint and 0
3185 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3193 return trunc_int_for_mode (value, SImode) == value;
3197 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3200 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3208 /* Evaluates constraint strings starting with letter N. Parameter STR
3209 contains the letters following letter "N" in the constraint string.
3210 Returns true if VALUE matches the constraint. */
3213 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3215 machine_mode mode, part_mode;
3217 int part, part_goal;
3223 part_goal = str[0] - '0';
3267 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3270 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3273 if (part_goal != -1 && part_goal != part)
3280 /* Returns true if the input parameter VALUE is a float zero. */
3283 s390_float_const_zero_p (rtx value)
3285 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3286 && value == CONST0_RTX (GET_MODE (value)));
3289 /* Implement TARGET_REGISTER_MOVE_COST. */
3292 s390_register_move_cost (machine_mode mode,
3293 reg_class_t from, reg_class_t to)
3295 /* On s390, copy between fprs and gprs is expensive. */
3297 /* It becomes somewhat faster having ldgr/lgdr. */
3298 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3300 /* ldgr is single cycle. */
3301 if (reg_classes_intersect_p (from, GENERAL_REGS)
3302 && reg_classes_intersect_p (to, FP_REGS))
3304 /* lgdr needs 3 cycles. */
3305 if (reg_classes_intersect_p (to, GENERAL_REGS)
3306 && reg_classes_intersect_p (from, FP_REGS))
3310 /* Otherwise copying is done via memory. */
3311 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3312 && reg_classes_intersect_p (to, FP_REGS))
3313 || (reg_classes_intersect_p (from, FP_REGS)
3314 && reg_classes_intersect_p (to, GENERAL_REGS)))
3320 /* Implement TARGET_MEMORY_MOVE_COST. */
3323 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3324 reg_class_t rclass ATTRIBUTE_UNUSED,
3325 bool in ATTRIBUTE_UNUSED)
3330 /* Compute a (partial) cost for rtx X. Return true if the complete
3331 cost has been computed, and false if subexpressions should be
3332 scanned. In either case, *TOTAL contains the cost result. The
3333 initial value of *TOTAL is the default value computed by
3334 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3335 code of the superexpression of x. */
3338 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3339 int opno ATTRIBUTE_UNUSED,
3340 int *total, bool speed ATTRIBUTE_UNUSED)
3342 int code = GET_CODE (x);
3350 case CONST_WIDE_INT:
3357 if (GET_CODE (XEXP (x, 0)) == AND
3358 && GET_CODE (XEXP (x, 1)) == ASHIFT
3359 && REG_P (XEXP (XEXP (x, 0), 0))
3360 && REG_P (XEXP (XEXP (x, 1), 0))
3361 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3362 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3363 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3364 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3366 *total = COSTS_N_INSNS (2);
3379 *total = COSTS_N_INSNS (1);
3384 *total = COSTS_N_INSNS (1);
3392 rtx left = XEXP (x, 0);
3393 rtx right = XEXP (x, 1);
3394 if (GET_CODE (right) == CONST_INT
3395 && CONST_OK_FOR_K (INTVAL (right)))
3396 *total = s390_cost->mhi;
3397 else if (GET_CODE (left) == SIGN_EXTEND)
3398 *total = s390_cost->mh;
3400 *total = s390_cost->ms; /* msr, ms, msy */
3405 rtx left = XEXP (x, 0);
3406 rtx right = XEXP (x, 1);
3409 if (GET_CODE (right) == CONST_INT
3410 && CONST_OK_FOR_K (INTVAL (right)))
3411 *total = s390_cost->mghi;
3412 else if (GET_CODE (left) == SIGN_EXTEND)
3413 *total = s390_cost->msgf;
3415 *total = s390_cost->msg; /* msgr, msg */
3417 else /* TARGET_31BIT */
3419 if (GET_CODE (left) == SIGN_EXTEND
3420 && GET_CODE (right) == SIGN_EXTEND)
3421 /* mulsidi case: mr, m */
3422 *total = s390_cost->m;
3423 else if (GET_CODE (left) == ZERO_EXTEND
3424 && GET_CODE (right) == ZERO_EXTEND
3425 && TARGET_CPU_ZARCH)
3426 /* umulsidi case: ml, mlr */
3427 *total = s390_cost->ml;
3429 /* Complex calculation is required. */
3430 *total = COSTS_N_INSNS (40);
3436 *total = s390_cost->mult_df;
3439 *total = s390_cost->mxbr;
3450 *total = s390_cost->madbr;
3453 *total = s390_cost->maebr;
3458 /* Negate in the third argument is free: FMSUB. */
3459 if (GET_CODE (XEXP (x, 2)) == NEG)
3461 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3462 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3463 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3470 if (mode == TImode) /* 128 bit division */
3471 *total = s390_cost->dlgr;
3472 else if (mode == DImode)
3474 rtx right = XEXP (x, 1);
3475 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3476 *total = s390_cost->dlr;
3477 else /* 64 by 64 bit division */
3478 *total = s390_cost->dlgr;
3480 else if (mode == SImode) /* 32 bit division */
3481 *total = s390_cost->dlr;
3488 rtx right = XEXP (x, 1);
3489 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3491 *total = s390_cost->dsgfr;
3493 *total = s390_cost->dr;
3494 else /* 64 by 64 bit division */
3495 *total = s390_cost->dsgr;
3497 else if (mode == SImode) /* 32 bit division */
3498 *total = s390_cost->dlr;
3499 else if (mode == SFmode)
3501 *total = s390_cost->debr;
3503 else if (mode == DFmode)
3505 *total = s390_cost->ddbr;
3507 else if (mode == TFmode)
3509 *total = s390_cost->dxbr;
3515 *total = s390_cost->sqebr;
3516 else if (mode == DFmode)
3517 *total = s390_cost->sqdbr;
3519 *total = s390_cost->sqxbr;
3524 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3525 || outer_code == PLUS || outer_code == MINUS
3526 || outer_code == COMPARE)
3531 *total = COSTS_N_INSNS (1);
3532 if (GET_CODE (XEXP (x, 0)) == AND
3533 && GET_CODE (XEXP (x, 1)) == CONST_INT
3534 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3536 rtx op0 = XEXP (XEXP (x, 0), 0);
3537 rtx op1 = XEXP (XEXP (x, 0), 1);
3538 rtx op2 = XEXP (x, 1);
3540 if (memory_operand (op0, GET_MODE (op0))
3541 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3543 if (register_operand (op0, GET_MODE (op0))
3544 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3554 /* Return the cost of an address rtx ADDR. */
3557 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3558 addr_space_t as ATTRIBUTE_UNUSED,
3559 bool speed ATTRIBUTE_UNUSED)
3561 struct s390_address ad;
3562 if (!s390_decompose_address (addr, &ad))
3565 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3568 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3570 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3572 int misalign ATTRIBUTE_UNUSED)
3574 switch (type_of_cost)
3584 case cond_branch_not_taken:
3586 case vec_promote_demote:
3587 case unaligned_load:
3588 case unaligned_store:
3591 case cond_branch_taken:
3595 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3602 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3603 otherwise return 0. */
3606 tls_symbolic_operand (rtx op)
3608 if (GET_CODE (op) != SYMBOL_REF)
3610 return SYMBOL_REF_TLS_MODEL (op);
3613 /* Split DImode access register reference REG (on 64-bit) into its constituent
3614 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3615 gen_highpart cannot be used as they assume all registers are word-sized,
3616 while our access registers have only half that size. */
3619 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3621 gcc_assert (TARGET_64BIT);
3622 gcc_assert (ACCESS_REG_P (reg));
3623 gcc_assert (GET_MODE (reg) == DImode);
3624 gcc_assert (!(REGNO (reg) & 1));
3626 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3627 *hi = gen_rtx_REG (SImode, REGNO (reg));
3630 /* Return true if OP contains a symbol reference */
3633 symbolic_reference_mentioned_p (rtx op)
3638 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3641 fmt = GET_RTX_FORMAT (GET_CODE (op));
3642 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3648 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3649 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3653 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3660 /* Return true if OP contains a reference to a thread-local symbol. */
3663 tls_symbolic_reference_mentioned_p (rtx op)
3668 if (GET_CODE (op) == SYMBOL_REF)
3669 return tls_symbolic_operand (op);
3671 fmt = GET_RTX_FORMAT (GET_CODE (op));
3672 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3678 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3679 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3683 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3691 /* Return true if OP is a legitimate general operand when
3692 generating PIC code. It is given that flag_pic is on
3693 and that OP satisfies CONSTANT_P. */
3696 legitimate_pic_operand_p (rtx op)
3698 /* Accept all non-symbolic constants. */
3699 if (!SYMBOLIC_CONST (op))
3702 /* Reject everything else; must be handled
3703 via emit_symbolic_move. */
3707 /* Returns true if the constant value OP is a legitimate general operand.
3708 It is given that OP satisfies CONSTANT_P. */
3711 s390_legitimate_constant_p (machine_mode mode, rtx op)
3713 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3715 if (GET_MODE_SIZE (mode) != 16)
3718 if (!satisfies_constraint_j00 (op)
3719 && !satisfies_constraint_jm1 (op)
3720 && !satisfies_constraint_jKK (op)
3721 && !satisfies_constraint_jxx (op)
3722 && !satisfies_constraint_jyy (op))
3726 /* Accept all non-symbolic constants. */
3727 if (!SYMBOLIC_CONST (op))
3730 /* Accept immediate LARL operands. */
3731 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3734 /* Thread-local symbols are never legal constants. This is
3735 so that emit_call knows that computing such addresses
3736 might require a function call. */
3737 if (TLS_SYMBOLIC_CONST (op))
3740 /* In the PIC case, symbolic constants must *not* be
3741 forced into the literal pool. We accept them here,
3742 so that they will be handled by emit_symbolic_move. */
3746 /* All remaining non-PIC symbolic constants are
3747 forced into the literal pool. */
3751 /* Determine if it's legal to put X into the constant pool. This
3752 is not possible if X contains the address of a symbol that is
3753 not constant (TLS) or not known at final link time (PIC). */
3756 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3758 switch (GET_CODE (x))
3762 case CONST_WIDE_INT:
3764 /* Accept all non-symbolic constants. */
3768 /* Labels are OK iff we are non-PIC. */
3769 return flag_pic != 0;
3772 /* 'Naked' TLS symbol references are never OK,
3773 non-TLS symbols are OK iff we are non-PIC. */
3774 if (tls_symbolic_operand (x))
3777 return flag_pic != 0;
3780 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3783 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3784 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3787 switch (XINT (x, 1))
3789 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3790 case UNSPEC_LTREL_OFFSET:
3798 case UNSPEC_GOTNTPOFF:
3799 case UNSPEC_INDNTPOFF:
3802 /* If the literal pool shares the code section, be put
3803 execute template placeholders into the pool as well. */
3805 return TARGET_CPU_ZARCH;
3817 /* Returns true if the constant value OP is a legitimate general
3818 operand during and after reload. The difference to
3819 legitimate_constant_p is that this function will not accept
3820 a constant that would need to be forced to the literal pool
3821 before it can be used as operand.
3822 This function accepts all constants which can be loaded directly
3826 legitimate_reload_constant_p (rtx op)
3828 /* Accept la(y) operands. */
3829 if (GET_CODE (op) == CONST_INT
3830 && DISP_IN_RANGE (INTVAL (op)))
3833 /* Accept l(g)hi/l(g)fi operands. */
3834 if (GET_CODE (op) == CONST_INT
3835 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3838 /* Accept lliXX operands. */
3840 && GET_CODE (op) == CONST_INT
3841 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3842 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3846 && GET_CODE (op) == CONST_INT
3847 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3848 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3851 /* Accept larl operands. */
3852 if (TARGET_CPU_ZARCH
3853 && larl_operand (op, VOIDmode))
3856 /* Accept floating-point zero operands that fit into a single GPR. */
3857 if (GET_CODE (op) == CONST_DOUBLE
3858 && s390_float_const_zero_p (op)
3859 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3862 /* Accept double-word operands that can be split. */
3863 if (GET_CODE (op) == CONST_WIDE_INT
3864 || (GET_CODE (op) == CONST_INT
3865 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
3867 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
3868 rtx hi = operand_subword (op, 0, 0, dword_mode);
3869 rtx lo = operand_subword (op, 1, 0, dword_mode);
3870 return legitimate_reload_constant_p (hi)
3871 && legitimate_reload_constant_p (lo);
3874 /* Everything else cannot be handled without reload. */
3878 /* Returns true if the constant value OP is a legitimate fp operand
3879 during and after reload.
3880 This function accepts all constants which can be loaded directly
3884 legitimate_reload_fp_constant_p (rtx op)
3886 /* Accept floating-point zero operands if the load zero instruction
3887 can be used. Prior to z196 the load fp zero instruction caused a
3888 performance penalty if the result is used as BFP number. */
3890 && GET_CODE (op) == CONST_DOUBLE
3891 && s390_float_const_zero_p (op))
3897 /* Returns true if the constant value OP is a legitimate vector operand
3898 during and after reload.
3899 This function accepts all constants which can be loaded directly
3903 legitimate_reload_vector_constant_p (rtx op)
3905 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
3906 && (satisfies_constraint_j00 (op)
3907 || satisfies_constraint_jm1 (op)
3908 || satisfies_constraint_jKK (op)
3909 || satisfies_constraint_jxx (op)
3910 || satisfies_constraint_jyy (op)))
3916 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
3917 return the class of reg to actually use. */
3920 s390_preferred_reload_class (rtx op, reg_class_t rclass)
3922 switch (GET_CODE (op))
3924 /* Constants we cannot reload into general registers
3925 must be forced into the literal pool. */
3929 case CONST_WIDE_INT:
3930 if (reg_class_subset_p (GENERAL_REGS, rclass)
3931 && legitimate_reload_constant_p (op))
3932 return GENERAL_REGS;
3933 else if (reg_class_subset_p (ADDR_REGS, rclass)
3934 && legitimate_reload_constant_p (op))
3936 else if (reg_class_subset_p (FP_REGS, rclass)
3937 && legitimate_reload_fp_constant_p (op))
3939 else if (reg_class_subset_p (VEC_REGS, rclass)
3940 && legitimate_reload_vector_constant_p (op))
3945 /* If a symbolic constant or a PLUS is reloaded,
3946 it is most likely being used as an address, so
3947 prefer ADDR_REGS. If 'class' is not a superset
3948 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
3950 /* Symrefs cannot be pushed into the literal pool with -fPIC
3951 so we *MUST NOT* return NO_REGS for these cases
3952 (s390_cannot_force_const_mem will return true).
3954 On the other hand we MUST return NO_REGS for symrefs with
3955 invalid addend which might have been pushed to the literal
3956 pool (no -fPIC). Usually we would expect them to be
3957 handled via secondary reload but this does not happen if
3958 they are used as literal pool slot replacement in reload
3959 inheritance (see emit_input_reload_insns). */
3960 if (TARGET_CPU_ZARCH
3961 && GET_CODE (XEXP (op, 0)) == PLUS
3962 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
3963 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
3965 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
3973 if (!legitimate_reload_constant_p (op))
3977 /* load address will be used. */
3978 if (reg_class_subset_p (ADDR_REGS, rclass))
3990 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
3991 multiple of ALIGNMENT and the SYMBOL_REF being naturally
3995 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
3997 HOST_WIDE_INT addend;
4000 /* The "required alignment" might be 0 (e.g. for certain structs
4001 accessed via BLKmode). Early abort in this case, as well as when
4002 an alignment > 8 is required. */
4003 if (alignment < 2 || alignment > 8)
4006 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4009 if (addend & (alignment - 1))
4012 if (GET_CODE (symref) == SYMBOL_REF)
4014 /* We have load-relative instructions for 2-byte, 4-byte, and
4015 8-byte alignment so allow only these. */
4018 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4019 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4020 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4021 default: return false;
4025 if (GET_CODE (symref) == UNSPEC
4026 && alignment <= UNITS_PER_LONG)
4032 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4033 operand SCRATCH is used to reload the even part of the address and
4037 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4039 HOST_WIDE_INT addend;
4042 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4046 /* Easy case. The addend is even so larl will do fine. */
4047 emit_move_insn (reg, addr);
4050 /* We can leave the scratch register untouched if the target
4051 register is a valid base register. */
4052 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4053 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4056 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4057 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4060 emit_move_insn (scratch,
4061 gen_rtx_CONST (Pmode,
4062 gen_rtx_PLUS (Pmode, symref,
4063 GEN_INT (addend - 1))));
4065 emit_move_insn (scratch, symref);
4067 /* Increment the address using la in order to avoid clobbering cc. */
4068 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4072 /* Generate what is necessary to move between REG and MEM using
4073 SCRATCH. The direction is given by TOMEM. */
4076 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4078 /* Reload might have pulled a constant out of the literal pool.
4079 Force it back in. */
4080 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4081 || GET_CODE (mem) == CONST_WIDE_INT
4082 || GET_CODE (mem) == CONST_VECTOR
4083 || GET_CODE (mem) == CONST)
4084 mem = force_const_mem (GET_MODE (reg), mem);
4086 gcc_assert (MEM_P (mem));
4088 /* For a load from memory we can leave the scratch register
4089 untouched if the target register is a valid base register. */
4091 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4092 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4093 && GET_MODE (reg) == GET_MODE (scratch))
4096 /* Load address into scratch register. Since we can't have a
4097 secondary reload for a secondary reload we have to cover the case
4098 where larl would need a secondary reload here as well. */
4099 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4101 /* Now we can use a standard load/store to do the move. */
4103 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4105 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4108 /* Inform reload about cases where moving X with a mode MODE to a register in
4109 RCLASS requires an extra scratch or immediate register. Return the class
4110 needed for the immediate register. */
4113 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4114 machine_mode mode, secondary_reload_info *sri)
4116 enum reg_class rclass = (enum reg_class) rclass_i;
4118 /* Intermediate register needed. */
4119 if (reg_classes_intersect_p (CC_REGS, rclass))
4120 return GENERAL_REGS;
4124 /* The vst/vl vector move instructions allow only for short
4127 && GET_CODE (XEXP (x, 0)) == PLUS
4128 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4129 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4130 && reg_class_subset_p (rclass, VEC_REGS)
4131 && (!reg_class_subset_p (rclass, FP_REGS)
4132 || (GET_MODE_SIZE (mode) > 8
4133 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4136 sri->icode = (TARGET_64BIT ?
4137 CODE_FOR_reloaddi_la_in :
4138 CODE_FOR_reloadsi_la_in);
4140 sri->icode = (TARGET_64BIT ?
4141 CODE_FOR_reloaddi_la_out :
4142 CODE_FOR_reloadsi_la_out);
4148 HOST_WIDE_INT offset;
4151 /* On z10 several optimizer steps may generate larl operands with
4154 && s390_loadrelative_operand_p (x, &symref, &offset)
4156 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4157 && (offset & 1) == 1)
4158 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4159 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4161 /* Handle all the (mem (symref)) accesses we cannot use the z10
4162 instructions for. */
4164 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4166 || !reg_class_subset_p (rclass, GENERAL_REGS)
4167 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4168 || !s390_check_symref_alignment (XEXP (x, 0),
4169 GET_MODE_SIZE (mode))))
4171 #define __SECONDARY_RELOAD_CASE(M,m) \
4174 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4175 CODE_FOR_reload##m##di_tomem_z10; \
4177 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4178 CODE_FOR_reload##m##si_tomem_z10; \
4181 switch (GET_MODE (x))
4183 __SECONDARY_RELOAD_CASE (QI, qi);
4184 __SECONDARY_RELOAD_CASE (HI, hi);
4185 __SECONDARY_RELOAD_CASE (SI, si);
4186 __SECONDARY_RELOAD_CASE (DI, di);
4187 __SECONDARY_RELOAD_CASE (TI, ti);
4188 __SECONDARY_RELOAD_CASE (SF, sf);
4189 __SECONDARY_RELOAD_CASE (DF, df);
4190 __SECONDARY_RELOAD_CASE (TF, tf);
4191 __SECONDARY_RELOAD_CASE (SD, sd);
4192 __SECONDARY_RELOAD_CASE (DD, dd);
4193 __SECONDARY_RELOAD_CASE (TD, td);
4194 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4195 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4196 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4197 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4198 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4199 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4200 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4201 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4202 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4203 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4204 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4205 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4206 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4207 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4208 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4209 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4210 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4211 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4212 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4213 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4214 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4218 #undef __SECONDARY_RELOAD_CASE
4222 /* We need a scratch register when loading a PLUS expression which
4223 is not a legitimate operand of the LOAD ADDRESS instruction. */
4224 /* LRA can deal with transformation of plus op very well -- so we
4225 don't need to prompt LRA in this case. */
4226 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4227 sri->icode = (TARGET_64BIT ?
4228 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4230 /* Performing a multiword move from or to memory we have to make sure the
4231 second chunk in memory is addressable without causing a displacement
4232 overflow. If that would be the case we calculate the address in
4233 a scratch register. */
4235 && GET_CODE (XEXP (x, 0)) == PLUS
4236 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4237 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4238 + GET_MODE_SIZE (mode) - 1))
4240 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4241 in a s_operand address since we may fallback to lm/stm. So we only
4242 have to care about overflows in the b+i+d case. */
4243 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4244 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4245 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4246 /* For FP_REGS no lm/stm is available so this check is triggered
4247 for displacement overflows in b+i+d and b+d like addresses. */
4248 || (reg_classes_intersect_p (FP_REGS, rclass)
4249 && s390_class_max_nregs (FP_REGS, mode) > 1))
4252 sri->icode = (TARGET_64BIT ?
4253 CODE_FOR_reloaddi_la_in :
4254 CODE_FOR_reloadsi_la_in);
4256 sri->icode = (TARGET_64BIT ?
4257 CODE_FOR_reloaddi_la_out :
4258 CODE_FOR_reloadsi_la_out);
4262 /* A scratch address register is needed when a symbolic constant is
4263 copied to r0 compiling with -fPIC. In other cases the target
4264 register might be used as temporary (see legitimize_pic_address). */
4265 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4266 sri->icode = (TARGET_64BIT ?
4267 CODE_FOR_reloaddi_PIC_addr :
4268 CODE_FOR_reloadsi_PIC_addr);
4270 /* Either scratch or no register needed. */
4274 /* Generate code to load SRC, which is PLUS that is not a
4275 legitimate operand for the LA instruction, into TARGET.
4276 SCRATCH may be used as scratch register. */
4279 s390_expand_plus_operand (rtx target, rtx src,
4283 struct s390_address ad;
4285 /* src must be a PLUS; get its two operands. */
4286 gcc_assert (GET_CODE (src) == PLUS);
4287 gcc_assert (GET_MODE (src) == Pmode);
4289 /* Check if any of the two operands is already scheduled
4290 for replacement by reload. This can happen e.g. when
4291 float registers occur in an address. */
4292 sum1 = find_replacement (&XEXP (src, 0));
4293 sum2 = find_replacement (&XEXP (src, 1));
4294 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4296 /* If the address is already strictly valid, there's nothing to do. */
4297 if (!s390_decompose_address (src, &ad)
4298 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4299 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4301 /* Otherwise, one of the operands cannot be an address register;
4302 we reload its value into the scratch register. */
4303 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4305 emit_move_insn (scratch, sum1);
4308 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4310 emit_move_insn (scratch, sum2);
4314 /* According to the way these invalid addresses are generated
4315 in reload.c, it should never happen (at least on s390) that
4316 *neither* of the PLUS components, after find_replacements
4317 was applied, is an address register. */
4318 if (sum1 == scratch && sum2 == scratch)
4324 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4327 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4328 is only ever performed on addresses, so we can mark the
4329 sum as legitimate for LA in any case. */
4330 s390_load_address (target, src);
4334 /* Return true if ADDR is a valid memory address.
4335 STRICT specifies whether strict register checking applies. */
4338 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4340 struct s390_address ad;
4343 && larl_operand (addr, VOIDmode)
4344 && (mode == VOIDmode
4345 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4348 if (!s390_decompose_address (addr, &ad))
4353 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4356 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4362 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4363 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4367 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4368 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4374 /* Return true if OP is a valid operand for the LA instruction.
4375 In 31-bit, we need to prove that the result is used as an
4376 address, as LA performs only a 31-bit addition. */
4379 legitimate_la_operand_p (rtx op)
4381 struct s390_address addr;
4382 if (!s390_decompose_address (op, &addr))
4385 return (TARGET_64BIT || addr.pointer);
4388 /* Return true if it is valid *and* preferable to use LA to
4389 compute the sum of OP1 and OP2. */
4392 preferred_la_operand_p (rtx op1, rtx op2)
4394 struct s390_address addr;
4396 if (op2 != const0_rtx)
4397 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4399 if (!s390_decompose_address (op1, &addr))
4401 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4403 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4406 /* Avoid LA instructions with index register on z196; it is
4407 preferable to use regular add instructions when possible.
4408 Starting with zEC12 the la with index register is "uncracked"
4410 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4413 if (!TARGET_64BIT && !addr.pointer)
4419 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4420 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4426 /* Emit a forced load-address operation to load SRC into DST.
4427 This will use the LOAD ADDRESS instruction even in situations
4428 where legitimate_la_operand_p (SRC) returns false. */
4431 s390_load_address (rtx dst, rtx src)
4434 emit_move_insn (dst, src);
4436 emit_insn (gen_force_la_31 (dst, src));
4439 /* Return a legitimate reference for ORIG (an address) using the
4440 register REG. If REG is 0, a new pseudo is generated.
4442 There are two types of references that must be handled:
4444 1. Global data references must load the address from the GOT, via
4445 the PIC reg. An insn is emitted to do this load, and the reg is
4448 2. Static data references, constant pool addresses, and code labels
4449 compute the address as an offset from the GOT, whose base is in
4450 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4451 differentiate them from global data objects. The returned
4452 address is the PIC reg + an unspec constant.
4454 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4455 reg also appears in the address. */
4458 legitimize_pic_address (rtx orig, rtx reg)
4461 rtx addend = const0_rtx;
4464 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4466 if (GET_CODE (addr) == CONST)
4467 addr = XEXP (addr, 0);
4469 if (GET_CODE (addr) == PLUS)
4471 addend = XEXP (addr, 1);
4472 addr = XEXP (addr, 0);
4475 if ((GET_CODE (addr) == LABEL_REF
4476 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
4477 || (GET_CODE (addr) == UNSPEC &&
4478 (XINT (addr, 1) == UNSPEC_GOTENT
4479 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4480 && GET_CODE (addend) == CONST_INT)
4482 /* This can be locally addressed. */
4484 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4485 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4486 gen_rtx_CONST (Pmode, addr) : addr);
4488 if (TARGET_CPU_ZARCH
4489 && larl_operand (const_addr, VOIDmode)
4490 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4491 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4493 if (INTVAL (addend) & 1)
4495 /* LARL can't handle odd offsets, so emit a pair of LARL
4497 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4499 if (!DISP_IN_RANGE (INTVAL (addend)))
4501 HOST_WIDE_INT even = INTVAL (addend) - 1;
4502 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4503 addr = gen_rtx_CONST (Pmode, addr);
4504 addend = const1_rtx;
4507 emit_move_insn (temp, addr);
4508 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4512 s390_load_address (reg, new_rtx);
4518 /* If the offset is even, we can just use LARL. This
4519 will happen automatically. */
4524 /* No larl - Access local symbols relative to the GOT. */
4526 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4528 if (reload_in_progress || reload_completed)
4529 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4531 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4532 if (addend != const0_rtx)
4533 addr = gen_rtx_PLUS (Pmode, addr, addend);
4534 addr = gen_rtx_CONST (Pmode, addr);
4535 addr = force_const_mem (Pmode, addr);
4536 emit_move_insn (temp, addr);
4538 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4541 s390_load_address (reg, new_rtx);
4546 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4548 /* A non-local symbol reference without addend.
4550 The symbol ref is wrapped into an UNSPEC to make sure the
4551 proper operand modifier (@GOT or @GOTENT) will be emitted.
4552 This will tell the linker to put the symbol into the GOT.
4554 Additionally the code dereferencing the GOT slot is emitted here.
4556 An addend to the symref needs to be added afterwards.
4557 legitimize_pic_address calls itself recursively to handle
4558 that case. So no need to do it here. */
4561 reg = gen_reg_rtx (Pmode);
4565 /* Use load relative if possible.
4566 lgrl <target>, sym@GOTENT */
4567 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4568 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4569 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4571 emit_move_insn (reg, new_rtx);
4574 else if (flag_pic == 1)
4576 /* Assume GOT offset is a valid displacement operand (< 4k
4577 or < 512k with z990). This is handled the same way in
4578 both 31- and 64-bit code (@GOT).
4579 lg <target>, sym@GOT(r12) */
4581 if (reload_in_progress || reload_completed)
4582 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4584 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4585 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4586 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4587 new_rtx = gen_const_mem (Pmode, new_rtx);
4588 emit_move_insn (reg, new_rtx);
4591 else if (TARGET_CPU_ZARCH)
4593 /* If the GOT offset might be >= 4k, we determine the position
4594 of the GOT entry via a PC-relative LARL (@GOTENT).
4595 larl temp, sym@GOTENT
4596 lg <target>, 0(temp) */
4598 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4600 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4601 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4603 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4604 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4605 emit_move_insn (temp, new_rtx);
4607 new_rtx = gen_const_mem (Pmode, temp);
4608 emit_move_insn (reg, new_rtx);
4614 /* If the GOT offset might be >= 4k, we have to load it
4615 from the literal pool (@GOT).
4617 lg temp, lit-litbase(r13)
4618 lg <target>, 0(temp)
4619 lit: .long sym@GOT */
4621 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4623 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4624 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4626 if (reload_in_progress || reload_completed)
4627 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4629 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4630 addr = gen_rtx_CONST (Pmode, addr);
4631 addr = force_const_mem (Pmode, addr);
4632 emit_move_insn (temp, addr);
4634 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4635 new_rtx = gen_const_mem (Pmode, new_rtx);
4636 emit_move_insn (reg, new_rtx);
4640 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4642 gcc_assert (XVECLEN (addr, 0) == 1);
4643 switch (XINT (addr, 1))
4645 /* These address symbols (or PLT slots) relative to the GOT
4646 (not GOT slots!). In general this will exceed the
4647 displacement range so these value belong into the literal
4651 new_rtx = force_const_mem (Pmode, orig);
4654 /* For -fPIC the GOT size might exceed the displacement
4655 range so make sure the value is in the literal pool. */
4658 new_rtx = force_const_mem (Pmode, orig);
4661 /* For @GOTENT larl is used. This is handled like local
4667 /* @PLT is OK as is on 64-bit, must be converted to
4668 GOT-relative @PLTOFF on 31-bit. */
4670 if (!TARGET_CPU_ZARCH)
4672 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4674 if (reload_in_progress || reload_completed)
4675 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4677 addr = XVECEXP (addr, 0, 0);
4678 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4680 if (addend != const0_rtx)
4681 addr = gen_rtx_PLUS (Pmode, addr, addend);
4682 addr = gen_rtx_CONST (Pmode, addr);
4683 addr = force_const_mem (Pmode, addr);
4684 emit_move_insn (temp, addr);
4686 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4689 s390_load_address (reg, new_rtx);
4694 /* On 64 bit larl can be used. This case is handled like
4695 local symbol refs. */
4699 /* Everything else cannot happen. */
4704 else if (addend != const0_rtx)
4706 /* Otherwise, compute the sum. */
4708 rtx base = legitimize_pic_address (addr, reg);
4709 new_rtx = legitimize_pic_address (addend,
4710 base == reg ? NULL_RTX : reg);
4711 if (GET_CODE (new_rtx) == CONST_INT)
4712 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4715 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4717 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4718 new_rtx = XEXP (new_rtx, 1);
4720 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4723 if (GET_CODE (new_rtx) == CONST)
4724 new_rtx = XEXP (new_rtx, 0);
4725 new_rtx = force_operand (new_rtx, 0);
4731 /* Load the thread pointer into a register. */
4734 s390_get_thread_pointer (void)
4736 rtx tp = gen_reg_rtx (Pmode);
4738 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4739 mark_reg_pointer (tp, BITS_PER_WORD);
4744 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4745 in s390_tls_symbol which always refers to __tls_get_offset.
4746 The returned offset is written to RESULT_REG and an USE rtx is
4747 generated for TLS_CALL. */
4749 static GTY(()) rtx s390_tls_symbol;
4752 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4757 emit_insn (s390_load_got ());
4759 if (!s390_tls_symbol)
4760 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4762 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4763 gen_rtx_REG (Pmode, RETURN_REGNUM));
4765 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4766 RTL_CONST_CALL_P (insn) = 1;
4769 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4770 this (thread-local) address. REG may be used as temporary. */
4773 legitimize_tls_address (rtx addr, rtx reg)
4775 rtx new_rtx, tls_call, temp, base, r2;
4778 if (GET_CODE (addr) == SYMBOL_REF)
4779 switch (tls_symbolic_operand (addr))
4781 case TLS_MODEL_GLOBAL_DYNAMIC:
4783 r2 = gen_rtx_REG (Pmode, 2);
4784 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4785 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4786 new_rtx = force_const_mem (Pmode, new_rtx);
4787 emit_move_insn (r2, new_rtx);
4788 s390_emit_tls_call_insn (r2, tls_call);
4789 insn = get_insns ();
4792 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4793 temp = gen_reg_rtx (Pmode);
4794 emit_libcall_block (insn, temp, r2, new_rtx);
4796 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4799 s390_load_address (reg, new_rtx);
4804 case TLS_MODEL_LOCAL_DYNAMIC:
4806 r2 = gen_rtx_REG (Pmode, 2);
4807 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4808 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4809 new_rtx = force_const_mem (Pmode, new_rtx);
4810 emit_move_insn (r2, new_rtx);
4811 s390_emit_tls_call_insn (r2, tls_call);
4812 insn = get_insns ();
4815 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4816 temp = gen_reg_rtx (Pmode);
4817 emit_libcall_block (insn, temp, r2, new_rtx);
4819 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4820 base = gen_reg_rtx (Pmode);
4821 s390_load_address (base, new_rtx);
4823 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4824 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4825 new_rtx = force_const_mem (Pmode, new_rtx);
4826 temp = gen_reg_rtx (Pmode);
4827 emit_move_insn (temp, new_rtx);
4829 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4832 s390_load_address (reg, new_rtx);
4837 case TLS_MODEL_INITIAL_EXEC:
4840 /* Assume GOT offset < 4k. This is handled the same way
4841 in both 31- and 64-bit code. */
4843 if (reload_in_progress || reload_completed)
4844 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4846 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4847 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4848 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4849 new_rtx = gen_const_mem (Pmode, new_rtx);
4850 temp = gen_reg_rtx (Pmode);
4851 emit_move_insn (temp, new_rtx);
4853 else if (TARGET_CPU_ZARCH)
4855 /* If the GOT offset might be >= 4k, we determine the position
4856 of the GOT entry via a PC-relative LARL. */
4858 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4859 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4860 temp = gen_reg_rtx (Pmode);
4861 emit_move_insn (temp, new_rtx);
4863 new_rtx = gen_const_mem (Pmode, temp);
4864 temp = gen_reg_rtx (Pmode);
4865 emit_move_insn (temp, new_rtx);
4869 /* If the GOT offset might be >= 4k, we have to load it
4870 from the literal pool. */
4872 if (reload_in_progress || reload_completed)
4873 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4875 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4876 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4877 new_rtx = force_const_mem (Pmode, new_rtx);
4878 temp = gen_reg_rtx (Pmode);
4879 emit_move_insn (temp, new_rtx);
4881 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4882 new_rtx = gen_const_mem (Pmode, new_rtx);
4884 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4885 temp = gen_reg_rtx (Pmode);
4886 emit_insn (gen_rtx_SET (temp, new_rtx));
4890 /* In position-dependent code, load the absolute address of
4891 the GOT entry from the literal pool. */
4893 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4894 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4895 new_rtx = force_const_mem (Pmode, new_rtx);
4896 temp = gen_reg_rtx (Pmode);
4897 emit_move_insn (temp, new_rtx);
4900 new_rtx = gen_const_mem (Pmode, new_rtx);
4901 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4902 temp = gen_reg_rtx (Pmode);
4903 emit_insn (gen_rtx_SET (temp, new_rtx));
4906 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4909 s390_load_address (reg, new_rtx);
4914 case TLS_MODEL_LOCAL_EXEC:
4915 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4916 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4917 new_rtx = force_const_mem (Pmode, new_rtx);
4918 temp = gen_reg_rtx (Pmode);
4919 emit_move_insn (temp, new_rtx);
4921 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4924 s390_load_address (reg, new_rtx);
4933 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
4935 switch (XINT (XEXP (addr, 0), 1))
4937 case UNSPEC_INDNTPOFF:
4938 gcc_assert (TARGET_CPU_ZARCH);
4947 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
4948 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4950 new_rtx = XEXP (XEXP (addr, 0), 0);
4951 if (GET_CODE (new_rtx) != SYMBOL_REF)
4952 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4954 new_rtx = legitimize_tls_address (new_rtx, reg);
4955 new_rtx = plus_constant (Pmode, new_rtx,
4956 INTVAL (XEXP (XEXP (addr, 0), 1)));
4957 new_rtx = force_operand (new_rtx, 0);
4961 gcc_unreachable (); /* for now ... */
4966 /* Emit insns making the address in operands[1] valid for a standard
4967 move to operands[0]. operands[1] is replaced by an address which
4968 should be used instead of the former RTX to emit the move
4972 emit_symbolic_move (rtx *operands)
4974 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
4976 if (GET_CODE (operands[0]) == MEM)
4977 operands[1] = force_reg (Pmode, operands[1]);
4978 else if (TLS_SYMBOLIC_CONST (operands[1]))
4979 operands[1] = legitimize_tls_address (operands[1], temp);
4981 operands[1] = legitimize_pic_address (operands[1], temp);
4984 /* Try machine-dependent ways of modifying an illegitimate address X
4985 to be legitimate. If we find one, return the new, valid address.
4987 OLDX is the address as it was before break_out_memory_refs was called.
4988 In some cases it is useful to look at this to decide what needs to be done.
4990 MODE is the mode of the operand pointed to by X.
4992 When -fpic is used, special handling is needed for symbolic references.
4993 See comments by legitimize_pic_address for details. */
4996 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4997 machine_mode mode ATTRIBUTE_UNUSED)
4999 rtx constant_term = const0_rtx;
5001 if (TLS_SYMBOLIC_CONST (x))
5003 x = legitimize_tls_address (x, 0);
5005 if (s390_legitimate_address_p (mode, x, FALSE))
5008 else if (GET_CODE (x) == PLUS
5009 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5010 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5016 if (SYMBOLIC_CONST (x)
5017 || (GET_CODE (x) == PLUS
5018 && (SYMBOLIC_CONST (XEXP (x, 0))
5019 || SYMBOLIC_CONST (XEXP (x, 1)))))
5020 x = legitimize_pic_address (x, 0);
5022 if (s390_legitimate_address_p (mode, x, FALSE))
5026 x = eliminate_constant_term (x, &constant_term);
5028 /* Optimize loading of large displacements by splitting them
5029 into the multiple of 4K and the rest; this allows the
5030 former to be CSE'd if possible.
5032 Don't do this if the displacement is added to a register
5033 pointing into the stack frame, as the offsets will
5034 change later anyway. */
5036 if (GET_CODE (constant_term) == CONST_INT
5037 && !TARGET_LONG_DISPLACEMENT
5038 && !DISP_IN_RANGE (INTVAL (constant_term))
5039 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5041 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5042 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5044 rtx temp = gen_reg_rtx (Pmode);
5045 rtx val = force_operand (GEN_INT (upper), temp);
5047 emit_move_insn (temp, val);
5049 x = gen_rtx_PLUS (Pmode, x, temp);
5050 constant_term = GEN_INT (lower);
5053 if (GET_CODE (x) == PLUS)
5055 if (GET_CODE (XEXP (x, 0)) == REG)
5057 rtx temp = gen_reg_rtx (Pmode);
5058 rtx val = force_operand (XEXP (x, 1), temp);
5060 emit_move_insn (temp, val);
5062 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5065 else if (GET_CODE (XEXP (x, 1)) == REG)
5067 rtx temp = gen_reg_rtx (Pmode);
5068 rtx val = force_operand (XEXP (x, 0), temp);
5070 emit_move_insn (temp, val);
5072 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5076 if (constant_term != const0_rtx)
5077 x = gen_rtx_PLUS (Pmode, x, constant_term);
5082 /* Try a machine-dependent way of reloading an illegitimate address AD
5083 operand. If we find one, push the reload and return the new address.
5085 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5086 and TYPE is the reload type of the current reload. */
5089 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5090 int opnum, int type)
5092 if (!optimize || TARGET_LONG_DISPLACEMENT)
5095 if (GET_CODE (ad) == PLUS)
5097 rtx tem = simplify_binary_operation (PLUS, Pmode,
5098 XEXP (ad, 0), XEXP (ad, 1));
5103 if (GET_CODE (ad) == PLUS
5104 && GET_CODE (XEXP (ad, 0)) == REG
5105 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5106 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5108 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5109 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5110 rtx cst, tem, new_rtx;
5112 cst = GEN_INT (upper);
5113 if (!legitimate_reload_constant_p (cst))
5114 cst = force_const_mem (Pmode, cst);
5116 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5117 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5119 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5120 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5121 opnum, (enum reload_type) type);
5128 /* Emit code to move LEN bytes from DST to SRC. */
5131 s390_expand_movmem (rtx dst, rtx src, rtx len)
5133 /* When tuning for z10 or higher we rely on the Glibc functions to
5134 do the right thing. Only for constant lengths below 64k we will
5135 generate inline code. */
5136 if (s390_tune >= PROCESSOR_2097_Z10
5137 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5140 /* Expand memcpy for constant length operands without a loop if it
5141 is shorter that way.
5143 With a constant length argument a
5144 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5145 if (GET_CODE (len) == CONST_INT
5146 && INTVAL (len) >= 0
5147 && INTVAL (len) <= 256 * 6
5148 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5152 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5154 rtx newdst = adjust_address (dst, BLKmode, o);
5155 rtx newsrc = adjust_address (src, BLKmode, o);
5156 emit_insn (gen_movmem_short (newdst, newsrc,
5157 GEN_INT (l > 256 ? 255 : l - 1)));
5161 else if (TARGET_MVCLE)
5163 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5168 rtx dst_addr, src_addr, count, blocks, temp;
5169 rtx_code_label *loop_start_label = gen_label_rtx ();
5170 rtx_code_label *loop_end_label = gen_label_rtx ();
5171 rtx_code_label *end_label = gen_label_rtx ();
5174 mode = GET_MODE (len);
5175 if (mode == VOIDmode)
5178 dst_addr = gen_reg_rtx (Pmode);
5179 src_addr = gen_reg_rtx (Pmode);
5180 count = gen_reg_rtx (mode);
5181 blocks = gen_reg_rtx (mode);
5183 convert_move (count, len, 1);
5184 emit_cmp_and_jump_insns (count, const0_rtx,
5185 EQ, NULL_RTX, mode, 1, end_label);
5187 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5188 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5189 dst = change_address (dst, VOIDmode, dst_addr);
5190 src = change_address (src, VOIDmode, src_addr);
5192 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5195 emit_move_insn (count, temp);
5197 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5200 emit_move_insn (blocks, temp);
5202 emit_cmp_and_jump_insns (blocks, const0_rtx,
5203 EQ, NULL_RTX, mode, 1, loop_end_label);
5205 emit_label (loop_start_label);
5208 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5212 /* Issue a read prefetch for the +3 cache line. */
5213 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5214 const0_rtx, const0_rtx);
5215 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5216 emit_insn (prefetch);
5218 /* Issue a write prefetch for the +3 cache line. */
5219 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5220 const1_rtx, const0_rtx);
5221 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5222 emit_insn (prefetch);
5225 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5226 s390_load_address (dst_addr,
5227 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5228 s390_load_address (src_addr,
5229 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5231 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5234 emit_move_insn (blocks, temp);
5236 emit_cmp_and_jump_insns (blocks, const0_rtx,
5237 EQ, NULL_RTX, mode, 1, loop_end_label);
5239 emit_jump (loop_start_label);
5240 emit_label (loop_end_label);
5242 emit_insn (gen_movmem_short (dst, src,
5243 convert_to_mode (Pmode, count, 1)));
5244 emit_label (end_label);
5249 /* Emit code to set LEN bytes at DST to VAL.
5250 Make use of clrmem if VAL is zero. */
5253 s390_expand_setmem (rtx dst, rtx len, rtx val)
5255 const int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5257 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5260 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5262 /* Expand setmem/clrmem for a constant length operand without a
5263 loop if it will be shorter that way.
5264 With a constant length and without pfd argument a
5265 clrmem loop is 32 bytes -> 5.3 * xc
5266 setmem loop is 36 bytes -> 3.6 * (mvi/stc + mvc) */
5267 if (GET_CODE (len) == CONST_INT
5268 && ((INTVAL (len) <= 256 * 5 && val == const0_rtx)
5269 || INTVAL (len) <= 257 * 3)
5270 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5274 if (val == const0_rtx)
5275 /* clrmem: emit 256 byte blockwise XCs. */
5276 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5278 rtx newdst = adjust_address (dst, BLKmode, o);
5279 emit_insn (gen_clrmem_short (newdst,
5280 GEN_INT (l > 256 ? 255 : l - 1)));
5283 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5284 setting first byte to val and using a 256 byte mvc with one
5285 byte overlap to propagate the byte. */
5286 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5288 rtx newdst = adjust_address (dst, BLKmode, o);
5289 emit_move_insn (adjust_address (dst, QImode, o), val);
5292 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5293 emit_insn (gen_movmem_short (newdstp1, newdst,
5294 GEN_INT (l > 257 ? 255 : l - 2)));
5299 else if (TARGET_MVCLE)
5301 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5303 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5306 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5312 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5313 rtx_code_label *loop_start_label = gen_label_rtx ();
5314 rtx_code_label *onebyte_end_label = gen_label_rtx ();
5315 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5316 rtx_code_label *restbyte_end_label = gen_label_rtx ();
5319 mode = GET_MODE (len);
5320 if (mode == VOIDmode)
5323 dst_addr = gen_reg_rtx (Pmode);
5324 count = gen_reg_rtx (mode);
5325 blocks = gen_reg_rtx (mode);
5327 convert_move (count, len, 1);
5328 emit_cmp_and_jump_insns (count, const0_rtx,
5329 EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5332 /* We need to make a copy of the target address since memset is
5333 supposed to return it unmodified. We have to make it here
5334 already since the new reg is used at onebyte_end_label. */
5335 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5336 dst = change_address (dst, VOIDmode, dst_addr);
5338 if (val != const0_rtx)
5340 /* When using the overlapping mvc the original target
5341 address is only accessed as single byte entity (even by
5342 the mvc reading this value). */
5343 set_mem_size (dst, 1);
5344 dstp1 = adjust_address (dst, VOIDmode, 1);
5345 emit_cmp_and_jump_insns (count,
5346 const1_rtx, EQ, NULL_RTX, mode, 1,
5347 onebyte_end_label, very_unlikely);
5350 /* There is one unconditional (mvi+mvc)/xc after the loop
5351 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5352 or one (xc) here leaves this number of bytes to be handled by
5354 temp = expand_binop (mode, add_optab, count,
5355 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5356 count, 1, OPTAB_DIRECT);
5358 emit_move_insn (count, temp);
5360 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5363 emit_move_insn (blocks, temp);
5365 emit_cmp_and_jump_insns (blocks, const0_rtx,
5366 EQ, NULL_RTX, mode, 1, restbyte_end_label);
5368 emit_jump (loop_start_label);
5370 if (val != const0_rtx)
5372 /* The 1 byte != 0 special case. Not handled efficiently
5373 since we require two jumps for that. However, this
5374 should be very rare. */
5375 emit_label (onebyte_end_label);
5376 emit_move_insn (adjust_address (dst, QImode, 0), val);
5377 emit_jump (zerobyte_end_label);
5380 emit_label (loop_start_label);
5383 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5385 /* Issue a write prefetch for the +4 cache line. */
5386 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5388 const1_rtx, const0_rtx);
5389 emit_insn (prefetch);
5390 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5393 if (val == const0_rtx)
5394 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5397 /* Set the first byte in the block to the value and use an
5398 overlapping mvc for the block. */
5399 emit_move_insn (adjust_address (dst, QImode, 0), val);
5400 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254)));
5402 s390_load_address (dst_addr,
5403 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5405 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5408 emit_move_insn (blocks, temp);
5410 emit_cmp_and_jump_insns (blocks, const0_rtx,
5411 NE, NULL_RTX, mode, 1, loop_start_label);
5413 emit_label (restbyte_end_label);
5415 if (val == const0_rtx)
5416 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5419 /* Set the first byte in the block to the value and use an
5420 overlapping mvc for the block. */
5421 emit_move_insn (adjust_address (dst, QImode, 0), val);
5422 /* execute only uses the lowest 8 bits of count that's
5423 exactly what we need here. */
5424 emit_insn (gen_movmem_short (dstp1, dst,
5425 convert_to_mode (Pmode, count, 1)));
5428 emit_label (zerobyte_end_label);
5432 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5433 and return the result in TARGET. */
5436 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5438 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5441 /* When tuning for z10 or higher we rely on the Glibc functions to
5442 do the right thing. Only for constant lengths below 64k we will
5443 generate inline code. */
5444 if (s390_tune >= PROCESSOR_2097_Z10
5445 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5448 /* As the result of CMPINT is inverted compared to what we need,
5449 we have to swap the operands. */
5450 tmp = op0; op0 = op1; op1 = tmp;
5452 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5454 if (INTVAL (len) > 0)
5456 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5457 emit_insn (gen_cmpint (target, ccreg));
5460 emit_move_insn (target, const0_rtx);
5462 else if (TARGET_MVCLE)
5464 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5465 emit_insn (gen_cmpint (target, ccreg));
5469 rtx addr0, addr1, count, blocks, temp;
5470 rtx_code_label *loop_start_label = gen_label_rtx ();
5471 rtx_code_label *loop_end_label = gen_label_rtx ();
5472 rtx_code_label *end_label = gen_label_rtx ();
5475 mode = GET_MODE (len);
5476 if (mode == VOIDmode)
5479 addr0 = gen_reg_rtx (Pmode);
5480 addr1 = gen_reg_rtx (Pmode);
5481 count = gen_reg_rtx (mode);
5482 blocks = gen_reg_rtx (mode);
5484 convert_move (count, len, 1);
5485 emit_cmp_and_jump_insns (count, const0_rtx,
5486 EQ, NULL_RTX, mode, 1, end_label);
5488 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5489 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5490 op0 = change_address (op0, VOIDmode, addr0);
5491 op1 = change_address (op1, VOIDmode, addr1);
5493 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5496 emit_move_insn (count, temp);
5498 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5501 emit_move_insn (blocks, temp);
5503 emit_cmp_and_jump_insns (blocks, const0_rtx,
5504 EQ, NULL_RTX, mode, 1, loop_end_label);
5506 emit_label (loop_start_label);
5509 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5513 /* Issue a read prefetch for the +2 cache line of operand 1. */
5514 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5515 const0_rtx, const0_rtx);
5516 emit_insn (prefetch);
5517 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5519 /* Issue a read prefetch for the +2 cache line of operand 2. */
5520 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5521 const0_rtx, const0_rtx);
5522 emit_insn (prefetch);
5523 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5526 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5527 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5528 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5529 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5530 temp = gen_rtx_SET (pc_rtx, temp);
5531 emit_jump_insn (temp);
5533 s390_load_address (addr0,
5534 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5535 s390_load_address (addr1,
5536 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5538 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5541 emit_move_insn (blocks, temp);
5543 emit_cmp_and_jump_insns (blocks, const0_rtx,
5544 EQ, NULL_RTX, mode, 1, loop_end_label);
5546 emit_jump (loop_start_label);
5547 emit_label (loop_end_label);
5549 emit_insn (gen_cmpmem_short (op0, op1,
5550 convert_to_mode (Pmode, count, 1)));
5551 emit_label (end_label);
5553 emit_insn (gen_cmpint (target, ccreg));
5558 /* Emit a conditional jump to LABEL for condition code mask MASK using
5559 comparsion operator COMPARISON. Return the emitted jump insn. */
5562 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5566 gcc_assert (comparison == EQ || comparison == NE);
5567 gcc_assert (mask > 0 && mask < 15);
5569 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5570 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5571 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5572 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5573 temp = gen_rtx_SET (pc_rtx, temp);
5574 return emit_jump_insn (temp);
5577 /* Emit the instructions to implement strlen of STRING and store the
5578 result in TARGET. The string has the known ALIGNMENT. This
5579 version uses vector instructions and is therefore not appropriate
5580 for targets prior to z13. */
5583 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5585 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5586 int very_likely = REG_BR_PROB_BASE - 1;
5587 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5588 rtx str_reg = gen_reg_rtx (V16QImode);
5589 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5590 rtx str_idx_reg = gen_reg_rtx (Pmode);
5591 rtx result_reg = gen_reg_rtx (V16QImode);
5592 rtx is_aligned_label = gen_label_rtx ();
5593 rtx into_loop_label = NULL_RTX;
5594 rtx loop_start_label = gen_label_rtx ();
5596 rtx len = gen_reg_rtx (QImode);
5599 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5600 emit_move_insn (str_idx_reg, const0_rtx);
5602 if (INTVAL (alignment) < 16)
5604 /* Check whether the address happens to be aligned properly so
5605 jump directly to the aligned loop. */
5606 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5607 str_addr_base_reg, GEN_INT (15)),
5608 const0_rtx, EQ, NULL_RTX,
5609 Pmode, 1, is_aligned_label);
5611 temp = gen_reg_rtx (Pmode);
5612 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5613 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5614 gcc_assert (REG_P (temp));
5615 highest_index_to_load_reg =
5616 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5617 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5618 gcc_assert (REG_P (highest_index_to_load_reg));
5619 emit_insn (gen_vllv16qi (str_reg,
5620 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5621 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5623 into_loop_label = gen_label_rtx ();
5624 s390_emit_jump (into_loop_label, NULL_RTX);
5628 emit_label (is_aligned_label);
5629 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5631 /* Reaching this point we are only performing 16 bytes aligned
5633 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5635 emit_label (loop_start_label);
5636 LABEL_NUSES (loop_start_label) = 1;
5638 /* Load 16 bytes of the string into VR. */
5639 emit_move_insn (str_reg,
5640 gen_rtx_MEM (V16QImode,
5641 gen_rtx_PLUS (Pmode, str_idx_reg,
5642 str_addr_base_reg)));
5643 if (into_loop_label != NULL_RTX)
5645 emit_label (into_loop_label);
5646 LABEL_NUSES (into_loop_label) = 1;
5649 /* Increment string index by 16 bytes. */
5650 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5651 str_idx_reg, 1, OPTAB_DIRECT);
5653 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5654 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5656 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5657 REG_BR_PROB, very_likely);
5658 emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
5660 /* If the string pointer wasn't aligned we have loaded less then 16
5661 bytes and the remaining bytes got filled with zeros (by vll).
5662 Now we have to check whether the resulting index lies within the
5663 bytes actually part of the string. */
5665 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5666 highest_index_to_load_reg);
5667 s390_load_address (highest_index_to_load_reg,
5668 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5671 emit_insn (gen_movdicc (str_idx_reg, cond,
5672 highest_index_to_load_reg, str_idx_reg));
5674 emit_insn (gen_movsicc (str_idx_reg, cond,
5675 highest_index_to_load_reg, str_idx_reg));
5677 add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
5680 expand_binop (Pmode, add_optab, str_idx_reg,
5681 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5682 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5684 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5685 convert_to_mode (Pmode, len, 1),
5686 target, 1, OPTAB_DIRECT);
5688 emit_move_insn (target, temp);
5692 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5694 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5695 rtx temp = gen_reg_rtx (Pmode);
5696 rtx src_addr = XEXP (src, 0);
5697 rtx dst_addr = XEXP (dst, 0);
5698 rtx src_addr_reg = gen_reg_rtx (Pmode);
5699 rtx dst_addr_reg = gen_reg_rtx (Pmode);
5700 rtx offset = gen_reg_rtx (Pmode);
5701 rtx vsrc = gen_reg_rtx (V16QImode);
5702 rtx vpos = gen_reg_rtx (V16QImode);
5703 rtx loadlen = gen_reg_rtx (SImode);
5704 rtx gpos_qi = gen_reg_rtx(QImode);
5705 rtx gpos = gen_reg_rtx (SImode);
5706 rtx done_label = gen_label_rtx ();
5707 rtx loop_label = gen_label_rtx ();
5708 rtx exit_label = gen_label_rtx ();
5709 rtx full_label = gen_label_rtx ();
5711 /* Perform a quick check for string ending on the first up to 16
5712 bytes and exit early if successful. */
5714 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5715 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5716 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5717 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5718 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5719 /* gpos is the byte index if a zero was found and 16 otherwise.
5720 So if it is lower than the loaded bytes we have a hit. */
5721 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5723 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5725 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5727 emit_jump (exit_label);
5730 emit_label (full_label);
5731 LABEL_NUSES (full_label) = 1;
5733 /* Calculate `offset' so that src + offset points to the last byte
5734 before 16 byte alignment. */
5736 /* temp = src_addr & 0xf */
5737 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5740 /* offset = 0xf - temp */
5741 emit_move_insn (offset, GEN_INT (15));
5742 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5745 /* Store `offset' bytes in the dstination string. The quick check
5746 has loaded at least `offset' bytes into vsrc. */
5748 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5750 /* Advance to the next byte to be loaded. */
5751 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5754 /* Make sure the addresses are single regs which can be used as a
5756 emit_move_insn (src_addr_reg, src_addr);
5757 emit_move_insn (dst_addr_reg, dst_addr);
5761 emit_label (loop_label);
5762 LABEL_NUSES (loop_label) = 1;
5764 emit_move_insn (vsrc,
5765 gen_rtx_MEM (V16QImode,
5766 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
5768 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
5769 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5770 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
5771 REG_BR_PROB, very_unlikely);
5773 emit_move_insn (gen_rtx_MEM (V16QImode,
5774 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
5777 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
5778 offset, 1, OPTAB_DIRECT);
5780 emit_jump (loop_label);
5785 /* We are done. Add the offset of the zero character to the dst_addr
5786 pointer to get the result. */
5788 emit_label (done_label);
5789 LABEL_NUSES (done_label) = 1;
5791 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
5794 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5795 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5797 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
5799 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
5804 emit_label (exit_label);
5805 LABEL_NUSES (exit_label) = 1;
5809 /* Expand conditional increment or decrement using alc/slb instructions.
5810 Should generate code setting DST to either SRC or SRC + INCREMENT,
5811 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5812 Returns true if successful, false otherwise.
5814 That makes it possible to implement some if-constructs without jumps e.g.:
5815 (borrow = CC0 | CC1 and carry = CC2 | CC3)
5816 unsigned int a, b, c;
5817 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
5818 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
5819 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
5820 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
5822 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5823 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
5824 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5825 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
5826 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
5829 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5830 rtx dst, rtx src, rtx increment)
5832 machine_mode cmp_mode;
5833 machine_mode cc_mode;
5839 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
5840 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
5842 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
5843 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
5848 /* Try ADD LOGICAL WITH CARRY. */
5849 if (increment == const1_rtx)
5851 /* Determine CC mode to use. */
5852 if (cmp_code == EQ || cmp_code == NE)
5854 if (cmp_op1 != const0_rtx)
5856 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5857 NULL_RTX, 0, OPTAB_WIDEN);
5858 cmp_op1 = const0_rtx;
5861 cmp_code = cmp_code == EQ ? LEU : GTU;
5864 if (cmp_code == LTU || cmp_code == LEU)
5869 cmp_code = swap_condition (cmp_code);
5886 /* Emit comparison instruction pattern. */
5887 if (!register_operand (cmp_op0, cmp_mode))
5888 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5890 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5891 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5892 /* We use insn_invalid_p here to add clobbers if required. */
5893 ret = insn_invalid_p (emit_insn (insn), false);
5896 /* Emit ALC instruction pattern. */
5897 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5898 gen_rtx_REG (cc_mode, CC_REGNUM),
5901 if (src != const0_rtx)
5903 if (!register_operand (src, GET_MODE (dst)))
5904 src = force_reg (GET_MODE (dst), src);
5906 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
5907 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
5910 p = rtvec_alloc (2);
5912 gen_rtx_SET (dst, op_res);
5914 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5915 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5920 /* Try SUBTRACT LOGICAL WITH BORROW. */
5921 if (increment == constm1_rtx)
5923 /* Determine CC mode to use. */
5924 if (cmp_code == EQ || cmp_code == NE)
5926 if (cmp_op1 != const0_rtx)
5928 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5929 NULL_RTX, 0, OPTAB_WIDEN);
5930 cmp_op1 = const0_rtx;
5933 cmp_code = cmp_code == EQ ? LEU : GTU;
5936 if (cmp_code == GTU || cmp_code == GEU)
5941 cmp_code = swap_condition (cmp_code);
5958 /* Emit comparison instruction pattern. */
5959 if (!register_operand (cmp_op0, cmp_mode))
5960 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5962 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5963 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5964 /* We use insn_invalid_p here to add clobbers if required. */
5965 ret = insn_invalid_p (emit_insn (insn), false);
5968 /* Emit SLB instruction pattern. */
5969 if (!register_operand (src, GET_MODE (dst)))
5970 src = force_reg (GET_MODE (dst), src);
5972 op_res = gen_rtx_MINUS (GET_MODE (dst),
5973 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
5974 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5975 gen_rtx_REG (cc_mode, CC_REGNUM),
5977 p = rtvec_alloc (2);
5979 gen_rtx_SET (dst, op_res);
5981 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5982 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5990 /* Expand code for the insv template. Return true if successful. */
5993 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
5995 int bitsize = INTVAL (op1);
5996 int bitpos = INTVAL (op2);
5997 machine_mode mode = GET_MODE (dest);
5999 int smode_bsize, mode_bsize;
6002 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6005 /* Generate INSERT IMMEDIATE (IILL et al). */
6006 /* (set (ze (reg)) (const_int)). */
6008 && register_operand (dest, word_mode)
6009 && (bitpos % 16) == 0
6010 && (bitsize % 16) == 0
6011 && const_int_operand (src, VOIDmode))
6013 HOST_WIDE_INT val = INTVAL (src);
6014 int regpos = bitpos + bitsize;
6016 while (regpos > bitpos)
6018 machine_mode putmode;
6021 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6026 putsize = GET_MODE_BITSIZE (putmode);
6028 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6031 gen_int_mode (val, putmode));
6034 gcc_assert (regpos == bitpos);
6038 smode = smallest_mode_for_size (bitsize, MODE_INT);
6039 smode_bsize = GET_MODE_BITSIZE (smode);
6040 mode_bsize = GET_MODE_BITSIZE (mode);
6042 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6044 && (bitsize % BITS_PER_UNIT) == 0
6046 && (register_operand (src, word_mode)
6047 || const_int_operand (src, VOIDmode)))
6049 /* Emit standard pattern if possible. */
6050 if (smode_bsize == bitsize)
6052 emit_move_insn (adjust_address (dest, smode, 0),
6053 gen_lowpart (smode, src));
6057 /* (set (ze (mem)) (const_int)). */
6058 else if (const_int_operand (src, VOIDmode))
6060 int size = bitsize / BITS_PER_UNIT;
6061 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6063 UNITS_PER_WORD - size);
6065 dest = adjust_address (dest, BLKmode, 0);
6066 set_mem_size (dest, size);
6067 s390_expand_movmem (dest, src_mem, GEN_INT (size));
6071 /* (set (ze (mem)) (reg)). */
6072 else if (register_operand (src, word_mode))
6075 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6079 /* Emit st,stcmh sequence. */
6080 int stcmh_width = bitsize - 32;
6081 int size = stcmh_width / BITS_PER_UNIT;
6083 emit_move_insn (adjust_address (dest, SImode, size),
6084 gen_lowpart (SImode, src));
6085 set_mem_size (dest, size);
6086 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6087 GEN_INT (stcmh_width),
6089 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6095 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6096 if ((bitpos % BITS_PER_UNIT) == 0
6097 && (bitsize % BITS_PER_UNIT) == 0
6098 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6100 && (mode == DImode || mode == SImode)
6101 && register_operand (dest, mode))
6103 /* Emit a strict_low_part pattern if possible. */
6104 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6106 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6107 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6108 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6109 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6113 /* ??? There are more powerful versions of ICM that are not
6114 completely represented in the md file. */
6117 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6118 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6120 machine_mode mode_s = GET_MODE (src);
6122 if (CONSTANT_P (src))
6124 /* For constant zero values the representation with AND
6125 appears to be folded in more situations than the (set
6126 (zero_extract) ...).
6127 We only do this when the start and end of the bitfield
6128 remain in the same SImode chunk. That way nihf or nilf
6130 The AND patterns might still generate a risbg for this. */
6131 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6134 src = force_reg (mode, src);
6136 else if (mode_s != mode)
6138 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6139 src = force_reg (mode_s, src);
6140 src = gen_lowpart (mode, src);
6143 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6144 op = gen_rtx_SET (op, src);
6148 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6149 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6159 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6160 register that holds VAL of mode MODE shifted by COUNT bits. */
6163 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6165 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6166 NULL_RTX, 1, OPTAB_DIRECT);
6167 return expand_simple_binop (SImode, ASHIFT, val, count,
6168 NULL_RTX, 1, OPTAB_DIRECT);
6171 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6172 the result in TARGET. */
6175 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6176 rtx cmp_op1, rtx cmp_op2)
6178 machine_mode mode = GET_MODE (target);
6179 bool neg_p = false, swap_p = false;
6182 if (GET_MODE (cmp_op1) == V2DFmode)
6186 /* NE a != b -> !(a == b) */
6187 case NE: cond = EQ; neg_p = true; break;
6188 /* UNGT a u> b -> !(b >= a) */
6189 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6190 /* UNGE a u>= b -> !(b > a) */
6191 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6192 /* LE: a <= b -> b >= a */
6193 case LE: cond = GE; swap_p = true; break;
6194 /* UNLE: a u<= b -> !(a > b) */
6195 case UNLE: cond = GT; neg_p = true; break;
6196 /* LT: a < b -> b > a */
6197 case LT: cond = GT; swap_p = true; break;
6198 /* UNLT: a u< b -> !(a >= b) */
6199 case UNLT: cond = GE; neg_p = true; break;
6201 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
6204 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
6207 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
6210 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
6219 /* NE: a != b -> !(a == b) */
6220 case NE: cond = EQ; neg_p = true; break;
6221 /* GE: a >= b -> !(b > a) */
6222 case GE: cond = GT; neg_p = true; swap_p = true; break;
6223 /* GEU: a >= b -> !(b > a) */
6224 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6225 /* LE: a <= b -> !(a > b) */
6226 case LE: cond = GT; neg_p = true; break;
6227 /* LEU: a <= b -> !(a > b) */
6228 case LEU: cond = GTU; neg_p = true; break;
6229 /* LT: a < b -> b > a */
6230 case LT: cond = GT; swap_p = true; break;
6231 /* LTU: a < b -> b > a */
6232 case LTU: cond = GTU; swap_p = true; break;
6239 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6242 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6244 cmp_op1, cmp_op2)));
6246 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6249 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6250 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6251 elements in CMP1 and CMP2 fulfill the comparison.
6252 This function is only used to emit patterns for the vx builtins and
6253 therefore only handles comparison codes required by the
6256 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6257 rtx cmp1, rtx cmp2, bool all_p)
6259 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6260 rtx tmp_reg = gen_reg_rtx (SImode);
6261 bool swap_p = false;
6263 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6269 cc_producer_mode = CCVEQmode;
6273 code = swap_condition (code);
6278 cc_producer_mode = CCVIHmode;
6282 code = swap_condition (code);
6287 cc_producer_mode = CCVIHUmode;
6293 scratch_mode = GET_MODE (cmp1);
6294 /* These codes represent inverted CC interpretations. Inverting
6295 an ALL CC mode results in an ANY CC mode and the other way
6296 around. Invert the all_p flag here to compensate for
6298 if (code == NE || code == LE || code == LEU)
6301 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6303 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6309 case EQ: cc_producer_mode = CCVEQmode; break;
6310 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6311 case GT: cc_producer_mode = CCVFHmode; break;
6312 case GE: cc_producer_mode = CCVFHEmode; break;
6313 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6314 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6315 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6316 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6317 default: gcc_unreachable ();
6319 scratch_mode = mode_for_vector (
6320 int_mode_for_mode (GET_MODE_INNER (GET_MODE (cmp1))),
6321 GET_MODE_NUNITS (GET_MODE (cmp1)));
6322 gcc_assert (scratch_mode != BLKmode);
6327 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6339 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6340 gen_rtvec (2, gen_rtx_SET (
6341 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6342 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6343 gen_rtx_CLOBBER (VOIDmode,
6344 gen_rtx_SCRATCH (scratch_mode)))));
6345 emit_move_insn (target, const0_rtx);
6346 emit_move_insn (tmp_reg, const1_rtx);
6348 emit_move_insn (target,
6349 gen_rtx_IF_THEN_ELSE (SImode,
6350 gen_rtx_fmt_ee (code, VOIDmode,
6351 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6356 /* Invert the comparison CODE applied to a CC mode. This is only safe
6357 if we know whether there result was created by a floating point
6358 compare or not. For the CCV modes this is encoded as part of the
6361 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6363 /* Reversal of FP compares takes care -- an ordered compare
6364 becomes an unordered compare and vice versa. */
6365 if (mode == CCVFALLmode || mode == CCVFANYmode)
6366 return reverse_condition_maybe_unordered (code);
6367 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6368 return reverse_condition (code);
6373 /* Generate a vector comparison expression loading either elements of
6374 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6378 s390_expand_vcond (rtx target, rtx then, rtx els,
6379 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6382 machine_mode result_mode;
6385 machine_mode target_mode = GET_MODE (target);
6386 machine_mode cmp_mode = GET_MODE (cmp_op1);
6387 rtx op = (cond == LT) ? els : then;
6389 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6390 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6391 for short and byte (x >> 15 and x >> 7 respectively). */
6392 if ((cond == LT || cond == GE)
6393 && target_mode == cmp_mode
6394 && cmp_op2 == CONST0_RTX (cmp_mode)
6395 && op == CONST0_RTX (target_mode)
6396 && s390_vector_mode_supported_p (target_mode)
6397 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6399 rtx negop = (cond == LT) ? then : els;
6401 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6403 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6404 if (negop == CONST1_RTX (target_mode))
6406 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6407 GEN_INT (shift), target,
6410 emit_move_insn (target, res);
6414 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6415 else if (all_ones_operand (negop, target_mode))
6417 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6418 GEN_INT (shift), target,
6421 emit_move_insn (target, res);
6426 /* We always use an integral type vector to hold the comparison
6428 result_mode = cmp_mode == V2DFmode ? V2DImode : cmp_mode;
6429 result_target = gen_reg_rtx (result_mode);
6431 /* We allow vector immediates as comparison operands that
6432 can be handled by the optimization above but not by the
6433 following code. Hence, force them into registers here. */
6434 if (!REG_P (cmp_op1))
6435 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6437 if (!REG_P (cmp_op2))
6438 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6440 s390_expand_vec_compare (result_target, cond,
6443 /* If the results are supposed to be either -1 or 0 we are done
6444 since this is what our compare instructions generate anyway. */
6445 if (all_ones_operand (then, GET_MODE (then))
6446 && const0_operand (els, GET_MODE (els)))
6448 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6453 /* Otherwise we will do a vsel afterwards. */
6454 /* This gets triggered e.g.
6455 with gcc.c-torture/compile/pr53410-1.c */
6457 then = force_reg (target_mode, then);
6460 els = force_reg (target_mode, els);
6462 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6464 CONST0_RTX (result_mode));
6466 /* We compared the result against zero above so we have to swap then
6468 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6470 gcc_assert (target_mode == GET_MODE (then));
6471 emit_insn (gen_rtx_SET (target, tmp));
6474 /* Emit the RTX necessary to initialize the vector TARGET with values
6477 s390_expand_vec_init (rtx target, rtx vals)
6479 machine_mode mode = GET_MODE (target);
6480 machine_mode inner_mode = GET_MODE_INNER (mode);
6481 int n_elts = GET_MODE_NUNITS (mode);
6482 bool all_same = true, all_regs = true, all_const_int = true;
6486 for (i = 0; i < n_elts; ++i)
6488 x = XVECEXP (vals, 0, i);
6490 if (!CONST_INT_P (x))
6491 all_const_int = false;
6493 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6500 /* Use vector gen mask or vector gen byte mask if possible. */
6501 if (all_same && all_const_int
6502 && (XVECEXP (vals, 0, 0) == const0_rtx
6503 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6505 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6507 emit_insn (gen_rtx_SET (target,
6508 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6514 emit_insn (gen_rtx_SET (target,
6515 gen_rtx_VEC_DUPLICATE (mode,
6516 XVECEXP (vals, 0, 0))));
6523 && GET_MODE_SIZE (inner_mode) == 8)
6525 /* Use vector load pair. */
6526 emit_insn (gen_rtx_SET (target,
6527 gen_rtx_VEC_CONCAT (mode,
6528 XVECEXP (vals, 0, 0),
6529 XVECEXP (vals, 0, 1))));
6533 /* We are about to set the vector elements one by one. Zero out the
6534 full register first in order to help the data flow framework to
6535 detect it as full VR set. */
6536 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6538 /* Unfortunately the vec_init expander is not allowed to fail. So
6539 we have to implement the fallback ourselves. */
6540 for (i = 0; i < n_elts; i++)
6542 rtx elem = XVECEXP (vals, 0, i);
6543 if (!general_operand (elem, GET_MODE (elem)))
6544 elem = force_reg (inner_mode, elem);
6546 emit_insn (gen_rtx_SET (target,
6547 gen_rtx_UNSPEC (mode,
6549 GEN_INT (i), target),
6554 /* Structure to hold the initial parameters for a compare_and_swap operation
6555 in HImode and QImode. */
6557 struct alignment_context
6559 rtx memsi; /* SI aligned memory location. */
6560 rtx shift; /* Bit offset with regard to lsb. */
6561 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6562 rtx modemaski; /* ~modemask */
6563 bool aligned; /* True if memory is aligned, false else. */
6566 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6567 structure AC for transparent simplifying, if the memory alignment is known
6568 to be at least 32bit. MEM is the memory location for the actual operation
6569 and MODE its mode. */
6572 init_alignment_context (struct alignment_context *ac, rtx mem,
6575 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6576 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6579 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6582 /* Alignment is unknown. */
6583 rtx byteoffset, addr, align;
6585 /* Force the address into a register. */
6586 addr = force_reg (Pmode, XEXP (mem, 0));
6588 /* Align it to SImode. */
6589 align = expand_simple_binop (Pmode, AND, addr,
6590 GEN_INT (-GET_MODE_SIZE (SImode)),
6591 NULL_RTX, 1, OPTAB_DIRECT);
6593 ac->memsi = gen_rtx_MEM (SImode, align);
6594 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6595 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6596 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6598 /* Calculate shiftcount. */
6599 byteoffset = expand_simple_binop (Pmode, AND, addr,
6600 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6601 NULL_RTX, 1, OPTAB_DIRECT);
6602 /* As we already have some offset, evaluate the remaining distance. */
6603 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6604 NULL_RTX, 1, OPTAB_DIRECT);
6607 /* Shift is the byte count, but we need the bitcount. */
6608 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6609 NULL_RTX, 1, OPTAB_DIRECT);
6611 /* Calculate masks. */
6612 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6613 GEN_INT (GET_MODE_MASK (mode)),
6614 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6615 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6619 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6620 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6621 perform the merge in SEQ2. */
6624 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6625 machine_mode mode, rtx val, rtx ins)
6632 tmp = copy_to_mode_reg (SImode, val);
6633 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6637 *seq2 = get_insns ();
6644 /* Failed to use insv. Generate a two part shift and mask. */
6646 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6647 *seq1 = get_insns ();
6651 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6652 *seq2 = get_insns ();
6658 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6659 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6660 value to set if CMP == MEM. */
6663 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6664 rtx cmp, rtx new_rtx, bool is_weak)
6666 struct alignment_context ac;
6667 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6668 rtx res = gen_reg_rtx (SImode);
6669 rtx_code_label *csloop = NULL, *csend = NULL;
6671 gcc_assert (MEM_P (mem));
6673 init_alignment_context (&ac, mem, mode);
6675 /* Load full word. Subsequent loads are performed by CS. */
6676 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6677 NULL_RTX, 1, OPTAB_DIRECT);
6679 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6680 possible, we try to use insv to make this happen efficiently. If
6681 that fails we'll generate code both inside and outside the loop. */
6682 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6683 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6690 /* Start CS loop. */
6693 /* Begin assuming success. */
6694 emit_move_insn (btarget, const1_rtx);
6696 csloop = gen_label_rtx ();
6697 csend = gen_label_rtx ();
6698 emit_label (csloop);
6701 /* val = "<mem>00..0<mem>"
6702 * cmp = "00..0<cmp>00..0"
6703 * new = "00..0<new>00..0"
6709 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
6711 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6716 /* Jump to end if we're done (likely?). */
6717 s390_emit_jump (csend, cc);
6719 /* Check for changes outside mode, and loop internal if so.
6720 Arrange the moves so that the compare is adjacent to the
6721 branch so that we can generate CRJ. */
6722 tmp = copy_to_reg (val);
6723 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6725 cc = s390_emit_compare (NE, val, tmp);
6726 s390_emit_jump (csloop, cc);
6729 emit_move_insn (btarget, const0_rtx);
6733 /* Return the correct part of the bitfield. */
6734 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6735 NULL_RTX, 1, OPTAB_DIRECT), 1);
6738 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
6739 and VAL the value to play with. If AFTER is true then store the value
6740 MEM holds after the operation, if AFTER is false then store the value MEM
6741 holds before the operation. If TARGET is zero then discard that value, else
6742 store it to TARGET. */
6745 s390_expand_atomic (machine_mode mode, enum rtx_code code,
6746 rtx target, rtx mem, rtx val, bool after)
6748 struct alignment_context ac;
6750 rtx new_rtx = gen_reg_rtx (SImode);
6751 rtx orig = gen_reg_rtx (SImode);
6752 rtx_code_label *csloop = gen_label_rtx ();
6754 gcc_assert (!target || register_operand (target, VOIDmode));
6755 gcc_assert (MEM_P (mem));
6757 init_alignment_context (&ac, mem, mode);
6759 /* Shift val to the correct bit positions.
6760 Preserve "icm", but prevent "ex icm". */
6761 if (!(ac.aligned && code == SET && MEM_P (val)))
6762 val = s390_expand_mask_and_shift (val, mode, ac.shift);
6764 /* Further preparation insns. */
6765 if (code == PLUS || code == MINUS)
6766 emit_move_insn (orig, val);
6767 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
6768 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
6769 NULL_RTX, 1, OPTAB_DIRECT);
6771 /* Load full word. Subsequent loads are performed by CS. */
6772 cmp = force_reg (SImode, ac.memsi);
6774 /* Start CS loop. */
6775 emit_label (csloop);
6776 emit_move_insn (new_rtx, cmp);
6778 /* Patch new with val at correct position. */
6783 val = expand_simple_binop (SImode, code, new_rtx, orig,
6784 NULL_RTX, 1, OPTAB_DIRECT);
6785 val = expand_simple_binop (SImode, AND, val, ac.modemask,
6786 NULL_RTX, 1, OPTAB_DIRECT);
6789 if (ac.aligned && MEM_P (val))
6790 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
6791 0, 0, SImode, val, false);
6794 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
6795 NULL_RTX, 1, OPTAB_DIRECT);
6796 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
6797 NULL_RTX, 1, OPTAB_DIRECT);
6803 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
6804 NULL_RTX, 1, OPTAB_DIRECT);
6806 case MULT: /* NAND */
6807 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
6808 NULL_RTX, 1, OPTAB_DIRECT);
6809 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
6810 NULL_RTX, 1, OPTAB_DIRECT);
6816 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
6817 ac.memsi, cmp, new_rtx));
6819 /* Return the correct part of the bitfield. */
6821 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
6822 after ? new_rtx : cmp, ac.shift,
6823 NULL_RTX, 1, OPTAB_DIRECT), 1);
6826 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6827 We need to emit DTP-relative relocations. */
6829 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6832 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
6837 fputs ("\t.long\t", file);
6840 fputs ("\t.quad\t", file);
6845 output_addr_const (file, x);
6846 fputs ("@DTPOFF", file);
6849 /* Return the proper mode for REGNO being represented in the dwarf
6852 s390_dwarf_frame_reg_mode (int regno)
6854 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
6856 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
6857 if (GENERAL_REGNO_P (regno))
6860 /* The rightmost 64 bits of vector registers are call-clobbered. */
6861 if (GET_MODE_SIZE (save_mode) > 8)
6867 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
6868 /* Implement TARGET_MANGLE_TYPE. */
6871 s390_mangle_type (const_tree type)
6873 type = TYPE_MAIN_VARIANT (type);
6875 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
6876 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
6879 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
6880 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
6881 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
6882 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
6884 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
6885 && TARGET_LONG_DOUBLE_128)
6888 /* For all other types, use normal C++ mangling. */
6893 /* In the name of slightly smaller debug output, and to cater to
6894 general assembler lossage, recognize various UNSPEC sequences
6895 and turn them back into a direct symbol reference. */
6898 s390_delegitimize_address (rtx orig_x)
6902 orig_x = delegitimize_mem_from_attrs (orig_x);
6905 /* Extract the symbol ref from:
6906 (plus:SI (reg:SI 12 %r12)
6907 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
6908 UNSPEC_GOTOFF/PLTOFF)))
6910 (plus:SI (reg:SI 12 %r12)
6911 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
6912 UNSPEC_GOTOFF/PLTOFF)
6913 (const_int 4 [0x4])))) */
6914 if (GET_CODE (x) == PLUS
6915 && REG_P (XEXP (x, 0))
6916 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
6917 && GET_CODE (XEXP (x, 1)) == CONST)
6919 HOST_WIDE_INT offset = 0;
6921 /* The const operand. */
6922 y = XEXP (XEXP (x, 1), 0);
6924 if (GET_CODE (y) == PLUS
6925 && GET_CODE (XEXP (y, 1)) == CONST_INT)
6927 offset = INTVAL (XEXP (y, 1));
6931 if (GET_CODE (y) == UNSPEC
6932 && (XINT (y, 1) == UNSPEC_GOTOFF
6933 || XINT (y, 1) == UNSPEC_PLTOFF))
6934 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
6937 if (GET_CODE (x) != MEM)
6941 if (GET_CODE (x) == PLUS
6942 && GET_CODE (XEXP (x, 1)) == CONST
6943 && GET_CODE (XEXP (x, 0)) == REG
6944 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6946 y = XEXP (XEXP (x, 1), 0);
6947 if (GET_CODE (y) == UNSPEC
6948 && XINT (y, 1) == UNSPEC_GOT)
6949 y = XVECEXP (y, 0, 0);
6953 else if (GET_CODE (x) == CONST)
6955 /* Extract the symbol ref from:
6956 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
6957 UNSPEC_PLT/GOTENT))) */
6960 if (GET_CODE (y) == UNSPEC
6961 && (XINT (y, 1) == UNSPEC_GOTENT
6962 || XINT (y, 1) == UNSPEC_PLT))
6963 y = XVECEXP (y, 0, 0);
6970 if (GET_MODE (orig_x) != Pmode)
6972 if (GET_MODE (orig_x) == BLKmode)
6974 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
6981 /* Output operand OP to stdio stream FILE.
6982 OP is an address (register + offset) which is not used to address data;
6983 instead the rightmost bits are interpreted as the value. */
6986 print_addrstyle_operand (FILE *file, rtx op)
6988 HOST_WIDE_INT offset;
6991 /* Extract base register and offset. */
6992 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
6998 gcc_assert (GET_CODE (base) == REG);
6999 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7000 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7003 /* Offsets are constricted to twelve bits. */
7004 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7006 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7009 /* Assigns the number of NOP halfwords to be emitted before and after the
7010 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7011 If hotpatching is disabled for the function, the values are set to zero.
7015 s390_function_num_hotpatch_hw (tree decl,
7021 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7023 /* Handle the arguments of the hotpatch attribute. The values
7024 specified via attribute might override the cmdline argument
7028 tree args = TREE_VALUE (attr);
7030 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7031 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7035 /* Use the values specified by the cmdline arguments. */
7036 *hw_before = s390_hotpatch_hw_before_label;
7037 *hw_after = s390_hotpatch_hw_after_label;
7041 /* Write the current .machine and .machinemode specification to the assembler
7044 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7046 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7048 fprintf (asm_out_file, "\t.machinemode %s\n",
7049 (TARGET_ZARCH) ? "zarch" : "esa");
7050 fprintf (asm_out_file, "\t.machine \"%s", processor_table[s390_arch].name);
7051 if (S390_USE_ARCHITECTURE_MODIFIERS)
7055 cpu_flags = processor_flags_table[(int) s390_arch];
7056 if (TARGET_HTM && !(cpu_flags & PF_TX))
7057 fprintf (asm_out_file, "+htm");
7058 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7059 fprintf (asm_out_file, "+nohtm");
7060 if (TARGET_VX && !(cpu_flags & PF_VX))
7061 fprintf (asm_out_file, "+vx");
7062 else if (!TARGET_VX && (cpu_flags & PF_VX))
7063 fprintf (asm_out_file, "+novx");
7065 fprintf (asm_out_file, "\"\n");
7068 /* Write an extra function header before the very start of the function. */
7071 s390_asm_output_function_prefix (FILE *asm_out_file,
7072 const char *fnname ATTRIBUTE_UNUSED)
7074 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7076 /* Since only the function specific options are saved but not the indications
7077 which options are set, it's too much work here to figure out which options
7078 have actually changed. Thus, generate .machine and .machinemode whenever a
7079 function has the target attribute or pragma. */
7080 fprintf (asm_out_file, "\t.machinemode push\n");
7081 fprintf (asm_out_file, "\t.machine push\n");
7082 s390_asm_output_machine_for_arch (asm_out_file);
7085 /* Write an extra function footer after the very end of the function. */
7088 s390_asm_declare_function_size (FILE *asm_out_file,
7089 const char *fnname, tree decl)
7091 if (!flag_inhibit_size_directive)
7092 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7093 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7095 fprintf (asm_out_file, "\t.machine pop\n");
7096 fprintf (asm_out_file, "\t.machinemode pop\n");
7100 /* Write the extra assembler code needed to declare a function properly. */
7103 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7106 int hw_before, hw_after;
7108 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7111 unsigned int function_alignment;
7114 /* Add a trampoline code area before the function label and initialize it
7115 with two-byte nop instructions. This area can be overwritten with code
7116 that jumps to a patched version of the function. */
7117 asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7118 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7120 for (i = 1; i < hw_before; i++)
7121 fputs ("\tnopr\t%r0\n", asm_out_file);
7123 /* Note: The function label must be aligned so that (a) the bytes of the
7124 following nop do not cross a cacheline boundary, and (b) a jump address
7125 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7126 stored directly before the label without crossing a cacheline
7127 boundary. All this is necessary to make sure the trampoline code can
7128 be changed atomically.
7129 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7130 if there are NOPs before the function label, the alignment is placed
7131 before them. So it is necessary to duplicate the alignment after the
7133 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7134 if (! DECL_USER_ALIGN (decl))
7135 function_alignment = MAX (function_alignment,
7136 (unsigned int) align_functions);
7137 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7138 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
7141 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7143 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7144 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7145 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7146 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7147 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7148 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7149 s390_warn_framesize);
7150 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7151 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7152 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7153 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7154 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7155 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7156 TARGET_PACKED_STACK);
7157 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7158 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7159 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7160 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7161 s390_warn_dynamicstack_p);
7163 ASM_OUTPUT_LABEL (asm_out_file, fname);
7165 asm_fprintf (asm_out_file,
7166 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7170 /* Output machine-dependent UNSPECs occurring in address constant X
7171 in assembler syntax to stdio stream FILE. Returns true if the
7172 constant X could be recognized, false otherwise. */
7175 s390_output_addr_const_extra (FILE *file, rtx x)
7177 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7178 switch (XINT (x, 1))
7181 output_addr_const (file, XVECEXP (x, 0, 0));
7182 fprintf (file, "@GOTENT");
7185 output_addr_const (file, XVECEXP (x, 0, 0));
7186 fprintf (file, "@GOT");
7189 output_addr_const (file, XVECEXP (x, 0, 0));
7190 fprintf (file, "@GOTOFF");
7193 output_addr_const (file, XVECEXP (x, 0, 0));
7194 fprintf (file, "@PLT");
7197 output_addr_const (file, XVECEXP (x, 0, 0));
7198 fprintf (file, "@PLTOFF");
7201 output_addr_const (file, XVECEXP (x, 0, 0));
7202 fprintf (file, "@TLSGD");
7205 assemble_name (file, get_some_local_dynamic_name ());
7206 fprintf (file, "@TLSLDM");
7209 output_addr_const (file, XVECEXP (x, 0, 0));
7210 fprintf (file, "@DTPOFF");
7213 output_addr_const (file, XVECEXP (x, 0, 0));
7214 fprintf (file, "@NTPOFF");
7216 case UNSPEC_GOTNTPOFF:
7217 output_addr_const (file, XVECEXP (x, 0, 0));
7218 fprintf (file, "@GOTNTPOFF");
7220 case UNSPEC_INDNTPOFF:
7221 output_addr_const (file, XVECEXP (x, 0, 0));
7222 fprintf (file, "@INDNTPOFF");
7226 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7227 switch (XINT (x, 1))
7229 case UNSPEC_POOL_OFFSET:
7230 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7231 output_addr_const (file, x);
7237 /* Output address operand ADDR in assembler syntax to
7238 stdio stream FILE. */
7241 print_operand_address (FILE *file, rtx addr)
7243 struct s390_address ad;
7244 memset (&ad, 0, sizeof (s390_address));
7246 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7250 output_operand_lossage ("symbolic memory references are "
7251 "only supported on z10 or later");
7254 output_addr_const (file, addr);
7258 if (!s390_decompose_address (addr, &ad)
7259 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7260 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7261 output_operand_lossage ("cannot decompose address");
7264 output_addr_const (file, ad.disp);
7266 fprintf (file, "0");
7268 if (ad.base && ad.indx)
7269 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7270 reg_names[REGNO (ad.base)]);
7272 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7275 /* Output operand X in assembler syntax to stdio stream FILE.
7276 CODE specified the format flag. The following format flags
7279 'C': print opcode suffix for branch condition.
7280 'D': print opcode suffix for inverse branch condition.
7281 'E': print opcode suffix for branch on index instruction.
7282 'G': print the size of the operand in bytes.
7283 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7284 'M': print the second word of a TImode operand.
7285 'N': print the second word of a DImode operand.
7286 'O': print only the displacement of a memory reference or address.
7287 'R': print only the base register of a memory reference or address.
7288 'S': print S-type memory reference (base+displacement).
7289 'Y': print address style operand without index (e.g. shift count or setmem
7292 'b': print integer X as if it's an unsigned byte.
7293 'c': print integer X as if it's an signed byte.
7294 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7295 'f': "end" contiguous bitmask X in SImode.
7296 'h': print integer X as if it's a signed halfword.
7297 'i': print the first nonzero HImode part of X.
7298 'j': print the first HImode part unequal to -1 of X.
7299 'k': print the first nonzero SImode part of X.
7300 'm': print the first SImode part unequal to -1 of X.
7301 'o': print integer X as if it's an unsigned 32bit word.
7302 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7303 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7304 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7305 'x': print integer X as if it's an unsigned halfword.
7306 'v': print register number as vector register (v1 instead of f1).
7310 print_operand (FILE *file, rtx x, int code)
7317 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7321 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7325 if (GET_CODE (x) == LE)
7326 fprintf (file, "l");
7327 else if (GET_CODE (x) == GT)
7328 fprintf (file, "h");
7330 output_operand_lossage ("invalid comparison operator "
7331 "for 'E' output modifier");
7335 if (GET_CODE (x) == SYMBOL_REF)
7337 fprintf (file, "%s", ":tls_load:");
7338 output_addr_const (file, x);
7340 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7342 fprintf (file, "%s", ":tls_gdcall:");
7343 output_addr_const (file, XVECEXP (x, 0, 0));
7345 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7347 fprintf (file, "%s", ":tls_ldcall:");
7348 const char *name = get_some_local_dynamic_name ();
7350 assemble_name (file, name);
7353 output_operand_lossage ("invalid reference for 'J' output modifier");
7357 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7362 struct s390_address ad;
7365 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7368 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7371 output_operand_lossage ("invalid address for 'O' output modifier");
7376 output_addr_const (file, ad.disp);
7378 fprintf (file, "0");
7384 struct s390_address ad;
7387 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7390 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7393 output_operand_lossage ("invalid address for 'R' output modifier");
7398 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7400 fprintf (file, "0");
7406 struct s390_address ad;
7411 output_operand_lossage ("memory reference expected for "
7412 "'S' output modifier");
7415 ret = s390_decompose_address (XEXP (x, 0), &ad);
7418 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7421 output_operand_lossage ("invalid address for 'S' output modifier");
7426 output_addr_const (file, ad.disp);
7428 fprintf (file, "0");
7431 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7436 if (GET_CODE (x) == REG)
7437 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7438 else if (GET_CODE (x) == MEM)
7439 x = change_address (x, VOIDmode,
7440 plus_constant (Pmode, XEXP (x, 0), 4));
7442 output_operand_lossage ("register or memory expression expected "
7443 "for 'N' output modifier");
7447 if (GET_CODE (x) == REG)
7448 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7449 else if (GET_CODE (x) == MEM)
7450 x = change_address (x, VOIDmode,
7451 plus_constant (Pmode, XEXP (x, 0), 8));
7453 output_operand_lossage ("register or memory expression expected "
7454 "for 'M' output modifier");
7458 print_addrstyle_operand (file, x);
7462 switch (GET_CODE (x))
7465 /* Print FP regs as fx instead of vx when they are accessed
7466 through non-vector mode. */
7468 || VECTOR_NOFP_REG_P (x)
7469 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7470 || (VECTOR_REG_P (x)
7471 && (GET_MODE_SIZE (GET_MODE (x)) /
7472 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7473 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7475 fprintf (file, "%s", reg_names[REGNO (x)]);
7479 output_address (GET_MODE (x), XEXP (x, 0));
7486 output_addr_const (file, x);
7499 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7505 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7508 ival = s390_extract_part (x, HImode, 0);
7511 ival = s390_extract_part (x, HImode, -1);
7514 ival = s390_extract_part (x, SImode, 0);
7517 ival = s390_extract_part (x, SImode, -1);
7529 len = (code == 's' || code == 'e' ? 64 : 32);
7530 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7532 if (code == 's' || code == 't')
7539 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7541 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7544 case CONST_WIDE_INT:
7546 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7547 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7548 else if (code == 'x')
7549 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7550 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7551 else if (code == 'h')
7552 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7553 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7557 output_operand_lossage ("invalid constant - try using "
7558 "an output modifier");
7560 output_operand_lossage ("invalid constant for output modifier '%c'",
7568 gcc_assert (const_vec_duplicate_p (x));
7569 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7570 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7578 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
7580 ival = (code == 's') ? start : end;
7581 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7587 bool ok = s390_bytemask_vector_p (x, &mask);
7589 fprintf (file, "%u", mask);
7594 output_operand_lossage ("invalid constant vector for output "
7595 "modifier '%c'", code);
7601 output_operand_lossage ("invalid expression - try using "
7602 "an output modifier");
7604 output_operand_lossage ("invalid expression for output "
7605 "modifier '%c'", code);
7610 /* Target hook for assembling integer objects. We need to define it
7611 here to work a round a bug in some versions of GAS, which couldn't
7612 handle values smaller than INT_MIN when printed in decimal. */
7615 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7617 if (size == 8 && aligned_p
7618 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7620 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7624 return default_assemble_integer (x, size, aligned_p);
7627 /* Returns true if register REGNO is used for forming
7628 a memory address in expression X. */
7631 reg_used_in_mem_p (int regno, rtx x)
7633 enum rtx_code code = GET_CODE (x);
7639 if (refers_to_regno_p (regno, XEXP (x, 0)))
7642 else if (code == SET
7643 && GET_CODE (SET_DEST (x)) == PC)
7645 if (refers_to_regno_p (regno, SET_SRC (x)))
7649 fmt = GET_RTX_FORMAT (code);
7650 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7653 && reg_used_in_mem_p (regno, XEXP (x, i)))
7656 else if (fmt[i] == 'E')
7657 for (j = 0; j < XVECLEN (x, i); j++)
7658 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
7664 /* Returns true if expression DEP_RTX sets an address register
7665 used by instruction INSN to address memory. */
7668 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
7672 if (NONJUMP_INSN_P (dep_rtx))
7673 dep_rtx = PATTERN (dep_rtx);
7675 if (GET_CODE (dep_rtx) == SET)
7677 target = SET_DEST (dep_rtx);
7678 if (GET_CODE (target) == STRICT_LOW_PART)
7679 target = XEXP (target, 0);
7680 while (GET_CODE (target) == SUBREG)
7681 target = SUBREG_REG (target);
7683 if (GET_CODE (target) == REG)
7685 int regno = REGNO (target);
7687 if (s390_safe_attr_type (insn) == TYPE_LA)
7689 pat = PATTERN (insn);
7690 if (GET_CODE (pat) == PARALLEL)
7692 gcc_assert (XVECLEN (pat, 0) == 2);
7693 pat = XVECEXP (pat, 0, 0);
7695 gcc_assert (GET_CODE (pat) == SET);
7696 return refers_to_regno_p (regno, SET_SRC (pat));
7698 else if (get_attr_atype (insn) == ATYPE_AGEN)
7699 return reg_used_in_mem_p (regno, PATTERN (insn));
7705 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
7708 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
7710 rtx dep_rtx = PATTERN (dep_insn);
7713 if (GET_CODE (dep_rtx) == SET
7714 && addr_generation_dependency_p (dep_rtx, insn))
7716 else if (GET_CODE (dep_rtx) == PARALLEL)
7718 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
7720 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
7728 /* A C statement (sans semicolon) to update the integer scheduling priority
7729 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
7730 reduce the priority to execute INSN later. Do not define this macro if
7731 you do not need to adjust the scheduling priorities of insns.
7733 A STD instruction should be scheduled earlier,
7734 in order to use the bypass. */
7736 s390_adjust_priority (rtx_insn *insn, int priority)
7738 if (! INSN_P (insn))
7741 if (s390_tune <= PROCESSOR_2064_Z900)
7744 switch (s390_safe_attr_type (insn))
7748 priority = priority << 3;
7752 priority = priority << 1;
7761 /* The number of instructions that can be issued per cycle. */
7764 s390_issue_rate (void)
7768 case PROCESSOR_2084_Z990:
7769 case PROCESSOR_2094_Z9_109:
7770 case PROCESSOR_2094_Z9_EC:
7771 case PROCESSOR_2817_Z196:
7773 case PROCESSOR_2097_Z10:
7775 case PROCESSOR_9672_G5:
7776 case PROCESSOR_9672_G6:
7777 case PROCESSOR_2064_Z900:
7778 /* Starting with EC12 we use the sched_reorder hook to take care
7779 of instruction dispatch constraints. The algorithm only
7780 picks the best instruction and assumes only a single
7781 instruction gets issued per cycle. */
7782 case PROCESSOR_2827_ZEC12:
7783 case PROCESSOR_2964_Z13:
7790 s390_first_cycle_multipass_dfa_lookahead (void)
7795 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
7796 Fix up MEMs as required. */
7799 annotate_constant_pool_refs (rtx *x)
7804 gcc_assert (GET_CODE (*x) != SYMBOL_REF
7805 || !CONSTANT_POOL_ADDRESS_P (*x));
7807 /* Literal pool references can only occur inside a MEM ... */
7808 if (GET_CODE (*x) == MEM)
7810 rtx memref = XEXP (*x, 0);
7812 if (GET_CODE (memref) == SYMBOL_REF
7813 && CONSTANT_POOL_ADDRESS_P (memref))
7815 rtx base = cfun->machine->base_reg;
7816 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
7819 *x = replace_equiv_address (*x, addr);
7823 if (GET_CODE (memref) == CONST
7824 && GET_CODE (XEXP (memref, 0)) == PLUS
7825 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
7826 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
7827 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
7829 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
7830 rtx sym = XEXP (XEXP (memref, 0), 0);
7831 rtx base = cfun->machine->base_reg;
7832 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7835 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
7840 /* ... or a load-address type pattern. */
7841 if (GET_CODE (*x) == SET)
7843 rtx addrref = SET_SRC (*x);
7845 if (GET_CODE (addrref) == SYMBOL_REF
7846 && CONSTANT_POOL_ADDRESS_P (addrref))
7848 rtx base = cfun->machine->base_reg;
7849 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
7852 SET_SRC (*x) = addr;
7856 if (GET_CODE (addrref) == CONST
7857 && GET_CODE (XEXP (addrref, 0)) == PLUS
7858 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
7859 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
7860 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
7862 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
7863 rtx sym = XEXP (XEXP (addrref, 0), 0);
7864 rtx base = cfun->machine->base_reg;
7865 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7868 SET_SRC (*x) = plus_constant (Pmode, addr, off);
7873 /* Annotate LTREL_BASE as well. */
7874 if (GET_CODE (*x) == UNSPEC
7875 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7877 rtx base = cfun->machine->base_reg;
7878 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
7883 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7884 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7888 annotate_constant_pool_refs (&XEXP (*x, i));
7890 else if (fmt[i] == 'E')
7892 for (j = 0; j < XVECLEN (*x, i); j++)
7893 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
7898 /* Split all branches that exceed the maximum distance.
7899 Returns true if this created a new literal pool entry. */
7902 s390_split_branches (void)
7904 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
7905 int new_literal = 0, ret;
7910 /* We need correct insn addresses. */
7912 shorten_branches (get_insns ());
7914 /* Find all branches that exceed 64KB, and split them. */
7916 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7918 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
7921 pat = PATTERN (insn);
7922 if (GET_CODE (pat) == PARALLEL)
7923 pat = XVECEXP (pat, 0, 0);
7924 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
7927 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
7929 label = &SET_SRC (pat);
7931 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
7933 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
7934 label = &XEXP (SET_SRC (pat), 1);
7935 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
7936 label = &XEXP (SET_SRC (pat), 2);
7943 if (get_attr_length (insn) <= 4)
7946 /* We are going to use the return register as scratch register,
7947 make sure it will be saved/restored by the prologue/epilogue. */
7948 cfun_frame_layout.save_return_addr_p = 1;
7953 rtx mem = force_const_mem (Pmode, *label);
7954 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
7956 INSN_ADDRESSES_NEW (set_insn, -1);
7957 annotate_constant_pool_refs (&PATTERN (set_insn));
7964 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
7965 UNSPEC_LTREL_OFFSET);
7966 target = gen_rtx_CONST (Pmode, target);
7967 target = force_const_mem (Pmode, target);
7968 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
7970 INSN_ADDRESSES_NEW (set_insn, -1);
7971 annotate_constant_pool_refs (&PATTERN (set_insn));
7973 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
7974 cfun->machine->base_reg),
7976 target = gen_rtx_PLUS (Pmode, temp_reg, target);
7979 ret = validate_change (insn, label, target, 0);
7987 /* Find an annotated literal pool symbol referenced in RTX X,
7988 and store it at REF. Will abort if X contains references to
7989 more than one such pool symbol; multiple references to the same
7990 symbol are allowed, however.
7992 The rtx pointed to by REF must be initialized to NULL_RTX
7993 by the caller before calling this routine. */
7996 find_constant_pool_ref (rtx x, rtx *ref)
8001 /* Ignore LTREL_BASE references. */
8002 if (GET_CODE (x) == UNSPEC
8003 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8005 /* Likewise POOL_ENTRY insns. */
8006 if (GET_CODE (x) == UNSPEC_VOLATILE
8007 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8010 gcc_assert (GET_CODE (x) != SYMBOL_REF
8011 || !CONSTANT_POOL_ADDRESS_P (x));
8013 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8015 rtx sym = XVECEXP (x, 0, 0);
8016 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8017 && CONSTANT_POOL_ADDRESS_P (sym));
8019 if (*ref == NULL_RTX)
8022 gcc_assert (*ref == sym);
8027 fmt = GET_RTX_FORMAT (GET_CODE (x));
8028 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8032 find_constant_pool_ref (XEXP (x, i), ref);
8034 else if (fmt[i] == 'E')
8036 for (j = 0; j < XVECLEN (x, i); j++)
8037 find_constant_pool_ref (XVECEXP (x, i, j), ref);
8042 /* Replace every reference to the annotated literal pool
8043 symbol REF in X by its base plus OFFSET. */
8046 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
8051 gcc_assert (*x != ref);
8053 if (GET_CODE (*x) == UNSPEC
8054 && XINT (*x, 1) == UNSPEC_LTREF
8055 && XVECEXP (*x, 0, 0) == ref)
8057 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8061 if (GET_CODE (*x) == PLUS
8062 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8063 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8064 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8065 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8067 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8068 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8072 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8073 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8077 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
8079 else if (fmt[i] == 'E')
8081 for (j = 0; j < XVECLEN (*x, i); j++)
8082 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
8087 /* Check whether X contains an UNSPEC_LTREL_BASE.
8088 Return its constant pool symbol if found, NULL_RTX otherwise. */
8091 find_ltrel_base (rtx x)
8096 if (GET_CODE (x) == UNSPEC
8097 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8098 return XVECEXP (x, 0, 0);
8100 fmt = GET_RTX_FORMAT (GET_CODE (x));
8101 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8105 rtx fnd = find_ltrel_base (XEXP (x, i));
8109 else if (fmt[i] == 'E')
8111 for (j = 0; j < XVECLEN (x, i); j++)
8113 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
8123 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
8126 replace_ltrel_base (rtx *x)
8131 if (GET_CODE (*x) == UNSPEC
8132 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8134 *x = XVECEXP (*x, 0, 1);
8138 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8139 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8143 replace_ltrel_base (&XEXP (*x, i));
8145 else if (fmt[i] == 'E')
8147 for (j = 0; j < XVECLEN (*x, i); j++)
8148 replace_ltrel_base (&XVECEXP (*x, i, j));
8154 /* We keep a list of constants which we have to add to internal
8155 constant tables in the middle of large functions. */
8157 #define NR_C_MODES 32
8158 machine_mode constant_modes[NR_C_MODES] =
8160 TFmode, TImode, TDmode,
8161 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8162 V4SFmode, V2DFmode, V1TFmode,
8163 DFmode, DImode, DDmode,
8164 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8165 SFmode, SImode, SDmode,
8166 V4QImode, V2HImode, V1SImode, V1SFmode,
8175 struct constant *next;
8177 rtx_code_label *label;
8180 struct constant_pool
8182 struct constant_pool *next;
8183 rtx_insn *first_insn;
8184 rtx_insn *pool_insn;
8186 rtx_insn *emit_pool_after;
8188 struct constant *constants[NR_C_MODES];
8189 struct constant *execute;
8190 rtx_code_label *label;
8194 /* Allocate new constant_pool structure. */
8196 static struct constant_pool *
8197 s390_alloc_pool (void)
8199 struct constant_pool *pool;
8202 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8204 for (i = 0; i < NR_C_MODES; i++)
8205 pool->constants[i] = NULL;
8207 pool->execute = NULL;
8208 pool->label = gen_label_rtx ();
8209 pool->first_insn = NULL;
8210 pool->pool_insn = NULL;
8211 pool->insns = BITMAP_ALLOC (NULL);
8213 pool->emit_pool_after = NULL;
8218 /* Create new constant pool covering instructions starting at INSN
8219 and chain it to the end of POOL_LIST. */
8221 static struct constant_pool *
8222 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8224 struct constant_pool *pool, **prev;
8226 pool = s390_alloc_pool ();
8227 pool->first_insn = insn;
8229 for (prev = pool_list; *prev; prev = &(*prev)->next)
8236 /* End range of instructions covered by POOL at INSN and emit
8237 placeholder insn representing the pool. */
8240 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8242 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8245 insn = get_last_insn ();
8247 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8248 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8251 /* Add INSN to the list of insns covered by POOL. */
8254 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8256 bitmap_set_bit (pool->insns, INSN_UID (insn));
8259 /* Return pool out of POOL_LIST that covers INSN. */
8261 static struct constant_pool *
8262 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8264 struct constant_pool *pool;
8266 for (pool = pool_list; pool; pool = pool->next)
8267 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8273 /* Add constant VAL of mode MODE to the constant pool POOL. */
8276 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8281 for (i = 0; i < NR_C_MODES; i++)
8282 if (constant_modes[i] == mode)
8284 gcc_assert (i != NR_C_MODES);
8286 for (c = pool->constants[i]; c != NULL; c = c->next)
8287 if (rtx_equal_p (val, c->value))
8292 c = (struct constant *) xmalloc (sizeof *c);
8294 c->label = gen_label_rtx ();
8295 c->next = pool->constants[i];
8296 pool->constants[i] = c;
8297 pool->size += GET_MODE_SIZE (mode);
8301 /* Return an rtx that represents the offset of X from the start of
8305 s390_pool_offset (struct constant_pool *pool, rtx x)
8309 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8310 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8311 UNSPEC_POOL_OFFSET);
8312 return gen_rtx_CONST (GET_MODE (x), x);
8315 /* Find constant VAL of mode MODE in the constant pool POOL.
8316 Return an RTX describing the distance from the start of
8317 the pool to the location of the new constant. */
8320 s390_find_constant (struct constant_pool *pool, rtx val,
8326 for (i = 0; i < NR_C_MODES; i++)
8327 if (constant_modes[i] == mode)
8329 gcc_assert (i != NR_C_MODES);
8331 for (c = pool->constants[i]; c != NULL; c = c->next)
8332 if (rtx_equal_p (val, c->value))
8337 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8340 /* Check whether INSN is an execute. Return the label_ref to its
8341 execute target template if so, NULL_RTX otherwise. */
8344 s390_execute_label (rtx insn)
8346 if (NONJUMP_INSN_P (insn)
8347 && GET_CODE (PATTERN (insn)) == PARALLEL
8348 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8349 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8350 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8355 /* Add execute target for INSN to the constant pool POOL. */
8358 s390_add_execute (struct constant_pool *pool, rtx insn)
8362 for (c = pool->execute; c != NULL; c = c->next)
8363 if (INSN_UID (insn) == INSN_UID (c->value))
8368 c = (struct constant *) xmalloc (sizeof *c);
8370 c->label = gen_label_rtx ();
8371 c->next = pool->execute;
8377 /* Find execute target for INSN in the constant pool POOL.
8378 Return an RTX describing the distance from the start of
8379 the pool to the location of the execute target. */
8382 s390_find_execute (struct constant_pool *pool, rtx insn)
8386 for (c = pool->execute; c != NULL; c = c->next)
8387 if (INSN_UID (insn) == INSN_UID (c->value))
8392 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8395 /* For an execute INSN, extract the execute target template. */
8398 s390_execute_target (rtx insn)
8400 rtx pattern = PATTERN (insn);
8401 gcc_assert (s390_execute_label (insn));
8403 if (XVECLEN (pattern, 0) == 2)
8405 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8409 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8412 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8413 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8415 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8421 /* Indicate that INSN cannot be duplicated. This is the case for
8422 execute insns that carry a unique label. */
8425 s390_cannot_copy_insn_p (rtx_insn *insn)
8427 rtx label = s390_execute_label (insn);
8428 return label && label != const0_rtx;
8431 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8432 do not emit the pool base label. */
8435 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8438 rtx_insn *insn = pool->pool_insn;
8441 /* Switch to rodata section. */
8442 if (TARGET_CPU_ZARCH)
8444 insn = emit_insn_after (gen_pool_section_start (), insn);
8445 INSN_ADDRESSES_NEW (insn, -1);
8448 /* Ensure minimum pool alignment. */
8449 if (TARGET_CPU_ZARCH)
8450 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8452 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8453 INSN_ADDRESSES_NEW (insn, -1);
8455 /* Emit pool base label. */
8458 insn = emit_label_after (pool->label, insn);
8459 INSN_ADDRESSES_NEW (insn, -1);
8462 /* Dump constants in descending alignment requirement order,
8463 ensuring proper alignment for every constant. */
8464 for (i = 0; i < NR_C_MODES; i++)
8465 for (c = pool->constants[i]; c; c = c->next)
8467 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8468 rtx value = copy_rtx (c->value);
8469 if (GET_CODE (value) == CONST
8470 && GET_CODE (XEXP (value, 0)) == UNSPEC
8471 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8472 && XVECLEN (XEXP (value, 0), 0) == 1)
8473 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8475 insn = emit_label_after (c->label, insn);
8476 INSN_ADDRESSES_NEW (insn, -1);
8478 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8479 gen_rtvec (1, value),
8480 UNSPECV_POOL_ENTRY);
8481 insn = emit_insn_after (value, insn);
8482 INSN_ADDRESSES_NEW (insn, -1);
8485 /* Ensure minimum alignment for instructions. */
8486 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8487 INSN_ADDRESSES_NEW (insn, -1);
8489 /* Output in-pool execute template insns. */
8490 for (c = pool->execute; c; c = c->next)
8492 insn = emit_label_after (c->label, insn);
8493 INSN_ADDRESSES_NEW (insn, -1);
8495 insn = emit_insn_after (s390_execute_target (c->value), insn);
8496 INSN_ADDRESSES_NEW (insn, -1);
8499 /* Switch back to previous section. */
8500 if (TARGET_CPU_ZARCH)
8502 insn = emit_insn_after (gen_pool_section_end (), insn);
8503 INSN_ADDRESSES_NEW (insn, -1);
8506 insn = emit_barrier_after (insn);
8507 INSN_ADDRESSES_NEW (insn, -1);
8509 /* Remove placeholder insn. */
8510 remove_insn (pool->pool_insn);
8513 /* Free all memory used by POOL. */
8516 s390_free_pool (struct constant_pool *pool)
8518 struct constant *c, *next;
8521 for (i = 0; i < NR_C_MODES; i++)
8522 for (c = pool->constants[i]; c; c = next)
8528 for (c = pool->execute; c; c = next)
8534 BITMAP_FREE (pool->insns);
8539 /* Collect main literal pool. Return NULL on overflow. */
8541 static struct constant_pool *
8542 s390_mainpool_start (void)
8544 struct constant_pool *pool;
8547 pool = s390_alloc_pool ();
8549 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8551 if (NONJUMP_INSN_P (insn)
8552 && GET_CODE (PATTERN (insn)) == SET
8553 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8554 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8556 /* There might be two main_pool instructions if base_reg
8557 is call-clobbered; one for shrink-wrapped code and one
8558 for the rest. We want to keep the first. */
8559 if (pool->pool_insn)
8561 insn = PREV_INSN (insn);
8562 delete_insn (NEXT_INSN (insn));
8565 pool->pool_insn = insn;
8568 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8570 s390_add_execute (pool, insn);
8572 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8574 rtx pool_ref = NULL_RTX;
8575 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8578 rtx constant = get_pool_constant (pool_ref);
8579 machine_mode mode = get_pool_mode (pool_ref);
8580 s390_add_constant (pool, constant, mode);
8584 /* If hot/cold partitioning is enabled we have to make sure that
8585 the literal pool is emitted in the same section where the
8586 initialization of the literal pool base pointer takes place.
8587 emit_pool_after is only used in the non-overflow case on non
8588 Z cpus where we can emit the literal pool at the end of the
8589 function body within the text section. */
8591 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8592 && !pool->emit_pool_after)
8593 pool->emit_pool_after = PREV_INSN (insn);
8596 gcc_assert (pool->pool_insn || pool->size == 0);
8598 if (pool->size >= 4096)
8600 /* We're going to chunkify the pool, so remove the main
8601 pool placeholder insn. */
8602 remove_insn (pool->pool_insn);
8604 s390_free_pool (pool);
8608 /* If the functions ends with the section where the literal pool
8609 should be emitted set the marker to its end. */
8610 if (pool && !pool->emit_pool_after)
8611 pool->emit_pool_after = get_last_insn ();
8616 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8617 Modify the current function to output the pool constants as well as
8618 the pool register setup instruction. */
8621 s390_mainpool_finish (struct constant_pool *pool)
8623 rtx base_reg = cfun->machine->base_reg;
8625 /* If the pool is empty, we're done. */
8626 if (pool->size == 0)
8628 /* We don't actually need a base register after all. */
8629 cfun->machine->base_reg = NULL_RTX;
8631 if (pool->pool_insn)
8632 remove_insn (pool->pool_insn);
8633 s390_free_pool (pool);
8637 /* We need correct insn addresses. */
8638 shorten_branches (get_insns ());
8640 /* On zSeries, we use a LARL to load the pool register. The pool is
8641 located in the .rodata section, so we emit it after the function. */
8642 if (TARGET_CPU_ZARCH)
8644 rtx set = gen_main_base_64 (base_reg, pool->label);
8645 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8646 INSN_ADDRESSES_NEW (insn, -1);
8647 remove_insn (pool->pool_insn);
8649 insn = get_last_insn ();
8650 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8651 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8653 s390_dump_pool (pool, 0);
8656 /* On S/390, if the total size of the function's code plus literal pool
8657 does not exceed 4096 bytes, we use BASR to set up a function base
8658 pointer, and emit the literal pool at the end of the function. */
8659 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
8660 + pool->size + 8 /* alignment slop */ < 4096)
8662 rtx set = gen_main_base_31_small (base_reg, pool->label);
8663 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8664 INSN_ADDRESSES_NEW (insn, -1);
8665 remove_insn (pool->pool_insn);
8667 insn = emit_label_after (pool->label, insn);
8668 INSN_ADDRESSES_NEW (insn, -1);
8670 /* emit_pool_after will be set by s390_mainpool_start to the
8671 last insn of the section where the literal pool should be
8673 insn = pool->emit_pool_after;
8675 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8676 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8678 s390_dump_pool (pool, 1);
8681 /* Otherwise, we emit an inline literal pool and use BASR to branch
8682 over it, setting up the pool register at the same time. */
8685 rtx_code_label *pool_end = gen_label_rtx ();
8687 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
8688 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
8689 JUMP_LABEL (insn) = pool_end;
8690 INSN_ADDRESSES_NEW (insn, -1);
8691 remove_insn (pool->pool_insn);
8693 insn = emit_label_after (pool->label, insn);
8694 INSN_ADDRESSES_NEW (insn, -1);
8696 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8697 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8699 insn = emit_label_after (pool_end, pool->pool_insn);
8700 INSN_ADDRESSES_NEW (insn, -1);
8702 s390_dump_pool (pool, 1);
8706 /* Replace all literal pool references. */
8708 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8711 replace_ltrel_base (&PATTERN (insn));
8713 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8715 rtx addr, pool_ref = NULL_RTX;
8716 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8719 if (s390_execute_label (insn))
8720 addr = s390_find_execute (pool, insn);
8722 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8723 get_pool_mode (pool_ref));
8725 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8726 INSN_CODE (insn) = -1;
8732 /* Free the pool. */
8733 s390_free_pool (pool);
8736 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8737 We have decided we cannot use this pool, so revert all changes
8738 to the current function that were done by s390_mainpool_start. */
8740 s390_mainpool_cancel (struct constant_pool *pool)
8742 /* We didn't actually change the instruction stream, so simply
8743 free the pool memory. */
8744 s390_free_pool (pool);
8748 /* Chunkify the literal pool. */
8750 #define S390_POOL_CHUNK_MIN 0xc00
8751 #define S390_POOL_CHUNK_MAX 0xe00
8753 static struct constant_pool *
8754 s390_chunkify_start (void)
8756 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8759 rtx pending_ltrel = NULL_RTX;
8762 rtx (*gen_reload_base) (rtx, rtx) =
8763 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
8766 /* We need correct insn addresses. */
8768 shorten_branches (get_insns ());
8770 /* Scan all insns and move literals to pool chunks. */
8772 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8774 bool section_switch_p = false;
8776 /* Check for pending LTREL_BASE. */
8779 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
8782 gcc_assert (ltrel_base == pending_ltrel);
8783 pending_ltrel = NULL_RTX;
8787 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8790 curr_pool = s390_start_pool (&pool_list, insn);
8792 s390_add_execute (curr_pool, insn);
8793 s390_add_pool_insn (curr_pool, insn);
8795 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8797 rtx pool_ref = NULL_RTX;
8798 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8801 rtx constant = get_pool_constant (pool_ref);
8802 machine_mode mode = get_pool_mode (pool_ref);
8805 curr_pool = s390_start_pool (&pool_list, insn);
8807 s390_add_constant (curr_pool, constant, mode);
8808 s390_add_pool_insn (curr_pool, insn);
8810 /* Don't split the pool chunk between a LTREL_OFFSET load
8811 and the corresponding LTREL_BASE. */
8812 if (GET_CODE (constant) == CONST
8813 && GET_CODE (XEXP (constant, 0)) == UNSPEC
8814 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
8816 gcc_assert (!pending_ltrel);
8817 pending_ltrel = pool_ref;
8822 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
8825 s390_add_pool_insn (curr_pool, insn);
8826 /* An LTREL_BASE must follow within the same basic block. */
8827 gcc_assert (!pending_ltrel);
8831 switch (NOTE_KIND (insn))
8833 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
8834 section_switch_p = true;
8836 case NOTE_INSN_VAR_LOCATION:
8837 case NOTE_INSN_CALL_ARG_LOCATION:
8844 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
8845 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
8848 if (TARGET_CPU_ZARCH)
8850 if (curr_pool->size < S390_POOL_CHUNK_MAX)
8853 s390_end_pool (curr_pool, NULL);
8858 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
8859 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
8862 /* We will later have to insert base register reload insns.
8863 Those will have an effect on code size, which we need to
8864 consider here. This calculation makes rather pessimistic
8865 worst-case assumptions. */
8869 if (chunk_size < S390_POOL_CHUNK_MIN
8870 && curr_pool->size < S390_POOL_CHUNK_MIN
8871 && !section_switch_p)
8874 /* Pool chunks can only be inserted after BARRIERs ... */
8875 if (BARRIER_P (insn))
8877 s390_end_pool (curr_pool, insn);
8882 /* ... so if we don't find one in time, create one. */
8883 else if (chunk_size > S390_POOL_CHUNK_MAX
8884 || curr_pool->size > S390_POOL_CHUNK_MAX
8885 || section_switch_p)
8887 rtx_insn *label, *jump, *barrier, *next, *prev;
8889 if (!section_switch_p)
8891 /* We can insert the barrier only after a 'real' insn. */
8892 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
8894 if (get_attr_length (insn) == 0)
8896 /* Don't separate LTREL_BASE from the corresponding
8897 LTREL_OFFSET load. */
8904 next = NEXT_INSN (insn);
8908 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
8909 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
8913 gcc_assert (!pending_ltrel);
8915 /* The old pool has to end before the section switch
8916 note in order to make it part of the current
8918 insn = PREV_INSN (insn);
8921 label = gen_label_rtx ();
8923 if (prev && NOTE_P (prev))
8924 prev = prev_nonnote_insn (prev);
8926 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
8927 INSN_LOCATION (prev));
8929 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
8930 barrier = emit_barrier_after (jump);
8931 insn = emit_label_after (label, barrier);
8932 JUMP_LABEL (jump) = label;
8933 LABEL_NUSES (label) = 1;
8935 INSN_ADDRESSES_NEW (jump, -1);
8936 INSN_ADDRESSES_NEW (barrier, -1);
8937 INSN_ADDRESSES_NEW (insn, -1);
8939 s390_end_pool (curr_pool, barrier);
8947 s390_end_pool (curr_pool, NULL);
8948 gcc_assert (!pending_ltrel);
8950 /* Find all labels that are branched into
8951 from an insn belonging to a different chunk. */
8953 far_labels = BITMAP_ALLOC (NULL);
8955 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8957 rtx_jump_table_data *table;
8959 /* Labels marked with LABEL_PRESERVE_P can be target
8960 of non-local jumps, so we have to mark them.
8961 The same holds for named labels.
8963 Don't do that, however, if it is the label before
8967 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
8969 rtx_insn *vec_insn = NEXT_INSN (insn);
8970 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
8971 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
8973 /* Check potential targets in a table jump (casesi_jump). */
8974 else if (tablejump_p (insn, NULL, &table))
8976 rtx vec_pat = PATTERN (table);
8977 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
8979 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
8981 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
8983 if (s390_find_pool (pool_list, label)
8984 != s390_find_pool (pool_list, insn))
8985 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8988 /* If we have a direct jump (conditional or unconditional),
8989 check all potential targets. */
8990 else if (JUMP_P (insn))
8992 rtx pat = PATTERN (insn);
8994 if (GET_CODE (pat) == PARALLEL)
8995 pat = XVECEXP (pat, 0, 0);
8997 if (GET_CODE (pat) == SET)
8999 rtx label = JUMP_LABEL (insn);
9000 if (label && !ANY_RETURN_P (label))
9002 if (s390_find_pool (pool_list, label)
9003 != s390_find_pool (pool_list, insn))
9004 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9010 /* Insert base register reload insns before every pool. */
9012 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9014 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9016 rtx_insn *insn = curr_pool->first_insn;
9017 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9020 /* Insert base register reload insns at every far label. */
9022 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9024 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9026 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9029 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9031 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9036 BITMAP_FREE (far_labels);
9039 /* Recompute insn addresses. */
9041 init_insn_lengths ();
9042 shorten_branches (get_insns ());
9047 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9048 After we have decided to use this list, finish implementing
9049 all changes to the current function as required. */
9052 s390_chunkify_finish (struct constant_pool *pool_list)
9054 struct constant_pool *curr_pool = NULL;
9058 /* Replace all literal pool references. */
9060 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9063 replace_ltrel_base (&PATTERN (insn));
9065 curr_pool = s390_find_pool (pool_list, insn);
9069 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9071 rtx addr, pool_ref = NULL_RTX;
9072 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9075 if (s390_execute_label (insn))
9076 addr = s390_find_execute (curr_pool, insn);
9078 addr = s390_find_constant (curr_pool,
9079 get_pool_constant (pool_ref),
9080 get_pool_mode (pool_ref));
9082 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9083 INSN_CODE (insn) = -1;
9088 /* Dump out all literal pools. */
9090 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9091 s390_dump_pool (curr_pool, 0);
9093 /* Free pool list. */
9097 struct constant_pool *next = pool_list->next;
9098 s390_free_pool (pool_list);
9103 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9104 We have decided we cannot use this list, so revert all changes
9105 to the current function that were done by s390_chunkify_start. */
9108 s390_chunkify_cancel (struct constant_pool *pool_list)
9110 struct constant_pool *curr_pool = NULL;
9113 /* Remove all pool placeholder insns. */
9115 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9117 /* Did we insert an extra barrier? Remove it. */
9118 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
9119 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
9120 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
9122 if (jump && JUMP_P (jump)
9123 && barrier && BARRIER_P (barrier)
9124 && label && LABEL_P (label)
9125 && GET_CODE (PATTERN (jump)) == SET
9126 && SET_DEST (PATTERN (jump)) == pc_rtx
9127 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
9128 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
9131 remove_insn (barrier);
9132 remove_insn (label);
9135 remove_insn (curr_pool->pool_insn);
9138 /* Remove all base register reload insns. */
9140 for (insn = get_insns (); insn; )
9142 rtx_insn *next_insn = NEXT_INSN (insn);
9144 if (NONJUMP_INSN_P (insn)
9145 && GET_CODE (PATTERN (insn)) == SET
9146 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
9147 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
9153 /* Free pool list. */
9157 struct constant_pool *next = pool_list->next;
9158 s390_free_pool (pool_list);
9163 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9166 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9168 switch (GET_MODE_CLASS (mode))
9171 case MODE_DECIMAL_FLOAT:
9172 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9174 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp), mode, align);
9178 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9179 mark_symbol_refs_as_used (exp);
9182 case MODE_VECTOR_INT:
9183 case MODE_VECTOR_FLOAT:
9186 machine_mode inner_mode;
9187 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9189 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9190 for (i = 0; i < XVECLEN (exp, 0); i++)
9191 s390_output_pool_entry (XVECEXP (exp, 0, i),
9195 : GET_MODE_BITSIZE (inner_mode));
9205 /* Return an RTL expression representing the value of the return address
9206 for the frame COUNT steps up from the current frame. FRAME is the
9207 frame pointer of that frame. */
9210 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9215 /* Without backchain, we fail for all but the current frame. */
9217 if (!TARGET_BACKCHAIN && count > 0)
9220 /* For the current frame, we need to make sure the initial
9221 value of RETURN_REGNUM is actually saved. */
9225 /* On non-z architectures branch splitting could overwrite r14. */
9226 if (TARGET_CPU_ZARCH)
9227 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9230 cfun_frame_layout.save_return_addr_p = true;
9231 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
9235 if (TARGET_PACKED_STACK)
9236 offset = -2 * UNITS_PER_LONG;
9238 offset = RETURN_REGNUM * UNITS_PER_LONG;
9240 addr = plus_constant (Pmode, frame, offset);
9241 addr = memory_address (Pmode, addr);
9242 return gen_rtx_MEM (Pmode, addr);
9245 /* Return an RTL expression representing the back chain stored in
9246 the current stack frame. */
9249 s390_back_chain_rtx (void)
9253 gcc_assert (TARGET_BACKCHAIN);
9255 if (TARGET_PACKED_STACK)
9256 chain = plus_constant (Pmode, stack_pointer_rtx,
9257 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9259 chain = stack_pointer_rtx;
9261 chain = gen_rtx_MEM (Pmode, chain);
9265 /* Find first call clobbered register unused in a function.
9266 This could be used as base register in a leaf function
9267 or for holding the return address before epilogue. */
9270 find_unused_clobbered_reg (void)
9273 for (i = 0; i < 6; i++)
9274 if (!df_regs_ever_live_p (i))
9280 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9281 clobbered hard regs in SETREG. */
9284 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9286 char *regs_ever_clobbered = (char *)data;
9287 unsigned int i, regno;
9288 machine_mode mode = GET_MODE (setreg);
9290 if (GET_CODE (setreg) == SUBREG)
9292 rtx inner = SUBREG_REG (setreg);
9293 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9295 regno = subreg_regno (setreg);
9297 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9298 regno = REGNO (setreg);
9303 i < regno + HARD_REGNO_NREGS (regno, mode);
9305 regs_ever_clobbered[i] = 1;
9308 /* Walks through all basic blocks of the current function looking
9309 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9310 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9311 each of those regs. */
9314 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9320 memset (regs_ever_clobbered, 0, 32);
9322 /* For non-leaf functions we have to consider all call clobbered regs to be
9326 for (i = 0; i < 32; i++)
9327 regs_ever_clobbered[i] = call_really_used_regs[i];
9330 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9331 this work is done by liveness analysis (mark_regs_live_at_end).
9332 Special care is needed for functions containing landing pads. Landing pads
9333 may use the eh registers, but the code which sets these registers is not
9334 contained in that function. Hence s390_regs_ever_clobbered is not able to
9335 deal with this automatically. */
9336 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9337 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9338 if (crtl->calls_eh_return
9339 || (cfun->machine->has_landing_pad_p
9340 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9341 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9343 /* For nonlocal gotos all call-saved registers have to be saved.
9344 This flag is also set for the unwinding code in libgcc.
9345 See expand_builtin_unwind_init. For regs_ever_live this is done by
9347 if (crtl->saves_all_registers)
9348 for (i = 0; i < 32; i++)
9349 if (!call_really_used_regs[i])
9350 regs_ever_clobbered[i] = 1;
9352 FOR_EACH_BB_FN (cur_bb, cfun)
9354 FOR_BB_INSNS (cur_bb, cur_insn)
9358 if (!INSN_P (cur_insn))
9361 pat = PATTERN (cur_insn);
9363 /* Ignore GPR restore insns. */
9364 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9366 if (GET_CODE (pat) == SET
9367 && GENERAL_REG_P (SET_DEST (pat)))
9370 if (GET_MODE (SET_SRC (pat)) == DImode
9371 && FP_REG_P (SET_SRC (pat)))
9375 if (GET_CODE (SET_SRC (pat)) == MEM)
9380 if (GET_CODE (pat) == PARALLEL
9381 && load_multiple_operation (pat, VOIDmode))
9386 s390_reg_clobbered_rtx,
9387 regs_ever_clobbered);
9392 /* Determine the frame area which actually has to be accessed
9393 in the function epilogue. The values are stored at the
9394 given pointers AREA_BOTTOM (address of the lowest used stack
9395 address) and AREA_TOP (address of the first item which does
9396 not belong to the stack frame). */
9399 s390_frame_area (int *area_bottom, int *area_top)
9406 if (cfun_frame_layout.first_restore_gpr != -1)
9408 b = (cfun_frame_layout.gprs_offset
9409 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9410 t = b + (cfun_frame_layout.last_restore_gpr
9411 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9414 if (TARGET_64BIT && cfun_save_high_fprs_p)
9416 b = MIN (b, cfun_frame_layout.f8_offset);
9417 t = MAX (t, (cfun_frame_layout.f8_offset
9418 + cfun_frame_layout.high_fprs * 8));
9423 if (cfun_fpr_save_p (FPR4_REGNUM))
9425 b = MIN (b, cfun_frame_layout.f4_offset);
9426 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9428 if (cfun_fpr_save_p (FPR6_REGNUM))
9430 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9431 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9437 /* Update gpr_save_slots in the frame layout trying to make use of
9438 FPRs as GPR save slots.
9439 This is a helper routine of s390_register_info. */
9442 s390_register_info_gprtofpr ()
9444 int save_reg_slot = FPR0_REGNUM;
9447 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9450 for (i = 15; i >= 6; i--)
9452 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9455 /* Advance to the next FP register which can be used as a
9457 while ((!call_really_used_regs[save_reg_slot]
9458 || df_regs_ever_live_p (save_reg_slot)
9459 || cfun_fpr_save_p (save_reg_slot))
9460 && FP_REGNO_P (save_reg_slot))
9462 if (!FP_REGNO_P (save_reg_slot))
9464 /* We only want to use ldgr/lgdr if we can get rid of
9465 stm/lm entirely. So undo the gpr slot allocation in
9466 case we ran out of FPR save slots. */
9467 for (j = 6; j <= 15; j++)
9468 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9469 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9472 cfun_gpr_save_slot (i) = save_reg_slot++;
9476 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9478 This is a helper routine for s390_register_info. */
9481 s390_register_info_stdarg_fpr ()
9487 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9488 f0-f4 for 64 bit. */
9490 || !TARGET_HARD_FLOAT
9491 || !cfun->va_list_fpr_size
9492 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9495 min_fpr = crtl->args.info.fprs;
9496 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9497 if (max_fpr >= FP_ARG_NUM_REG)
9498 max_fpr = FP_ARG_NUM_REG - 1;
9500 /* FPR argument regs start at f0. */
9501 min_fpr += FPR0_REGNUM;
9502 max_fpr += FPR0_REGNUM;
9504 for (i = min_fpr; i <= max_fpr; i++)
9505 cfun_set_fpr_save (i);
9508 /* Reserve the GPR save slots for GPRs which need to be saved due to
9510 This is a helper routine for s390_register_info. */
9513 s390_register_info_stdarg_gpr ()
9520 || !cfun->va_list_gpr_size
9521 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9524 min_gpr = crtl->args.info.gprs;
9525 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9526 if (max_gpr >= GP_ARG_NUM_REG)
9527 max_gpr = GP_ARG_NUM_REG - 1;
9529 /* GPR argument regs start at r2. */
9530 min_gpr += GPR2_REGNUM;
9531 max_gpr += GPR2_REGNUM;
9533 /* If r6 was supposed to be saved into an FPR and now needs to go to
9534 the stack for vararg we have to adjust the restore range to make
9535 sure that the restore is done from stack as well. */
9536 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9537 && min_gpr <= GPR6_REGNUM
9538 && max_gpr >= GPR6_REGNUM)
9540 if (cfun_frame_layout.first_restore_gpr == -1
9541 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9542 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9543 if (cfun_frame_layout.last_restore_gpr == -1
9544 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9545 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9548 if (cfun_frame_layout.first_save_gpr == -1
9549 || cfun_frame_layout.first_save_gpr > min_gpr)
9550 cfun_frame_layout.first_save_gpr = min_gpr;
9552 if (cfun_frame_layout.last_save_gpr == -1
9553 || cfun_frame_layout.last_save_gpr < max_gpr)
9554 cfun_frame_layout.last_save_gpr = max_gpr;
9556 for (i = min_gpr; i <= max_gpr; i++)
9557 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9560 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9561 prologue and epilogue. */
9564 s390_register_info_set_ranges ()
9568 /* Find the first and the last save slot supposed to use the stack
9569 to set the restore range.
9570 Vararg regs might be marked as save to stack but only the
9571 call-saved regs really need restoring (i.e. r6). This code
9572 assumes that the vararg regs have not yet been recorded in
9573 cfun_gpr_save_slot. */
9574 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9575 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9576 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9577 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9578 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9579 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9582 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9583 for registers which need to be saved in function prologue.
9584 This function can be used until the insns emitted for save/restore
9585 of the regs are visible in the RTL stream. */
9588 s390_register_info ()
9591 char clobbered_regs[32];
9593 gcc_assert (!epilogue_completed);
9595 if (reload_completed)
9596 /* After reload we rely on our own routine to determine which
9597 registers need saving. */
9598 s390_regs_ever_clobbered (clobbered_regs);
9600 /* During reload we use regs_ever_live as a base since reload
9601 does changes in there which we otherwise would not be aware
9603 for (i = 0; i < 32; i++)
9604 clobbered_regs[i] = df_regs_ever_live_p (i);
9606 for (i = 0; i < 32; i++)
9607 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9609 /* Mark the call-saved FPRs which need to be saved.
9610 This needs to be done before checking the special GPRs since the
9611 stack pointer usage depends on whether high FPRs have to be saved
9613 cfun_frame_layout.fpr_bitmap = 0;
9614 cfun_frame_layout.high_fprs = 0;
9615 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9616 if (clobbered_regs[i] && !call_really_used_regs[i])
9618 cfun_set_fpr_save (i);
9619 if (i >= FPR8_REGNUM)
9620 cfun_frame_layout.high_fprs++;
9623 /* Register 12 is used for GOT address, but also as temp in prologue
9624 for split-stack stdarg functions (unless r14 is available). */
9626 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9627 || (flag_split_stack && cfun->stdarg
9628 && (crtl->is_leaf || TARGET_TPF_PROFILING
9629 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9631 clobbered_regs[BASE_REGNUM]
9632 |= (cfun->machine->base_reg
9633 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9635 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9636 |= !!frame_pointer_needed;
9638 /* On pre z900 machines this might take until machine dependent
9640 save_return_addr_p will only be set on non-zarch machines so
9641 there is no risk that r14 goes into an FPR instead of a stack
9643 clobbered_regs[RETURN_REGNUM]
9645 || TARGET_TPF_PROFILING
9646 || cfun->machine->split_branches_pending_p
9647 || cfun_frame_layout.save_return_addr_p
9648 || crtl->calls_eh_return);
9650 clobbered_regs[STACK_POINTER_REGNUM]
9652 || TARGET_TPF_PROFILING
9653 || cfun_save_high_fprs_p
9654 || get_frame_size () > 0
9655 || (reload_completed && cfun_frame_layout.frame_size > 0)
9656 || cfun->calls_alloca);
9658 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9660 for (i = 6; i < 16; i++)
9661 if (clobbered_regs[i])
9662 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9664 s390_register_info_stdarg_fpr ();
9665 s390_register_info_gprtofpr ();
9666 s390_register_info_set_ranges ();
9667 /* stdarg functions might need to save GPRs 2 to 6. This might
9668 override the GPR->FPR save decision made by
9669 s390_register_info_gprtofpr for r6 since vararg regs must go to
9671 s390_register_info_stdarg_gpr ();
9674 /* This function is called by s390_optimize_prologue in order to get
9675 rid of unnecessary GPR save/restore instructions. The register info
9676 for the GPRs is re-computed and the ranges are re-calculated. */
9679 s390_optimize_register_info ()
9681 char clobbered_regs[32];
9684 gcc_assert (epilogue_completed);
9685 gcc_assert (!cfun->machine->split_branches_pending_p);
9687 s390_regs_ever_clobbered (clobbered_regs);
9689 for (i = 0; i < 32; i++)
9690 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9692 /* There is still special treatment needed for cases invisible to
9693 s390_regs_ever_clobbered. */
9694 clobbered_regs[RETURN_REGNUM]
9695 |= (TARGET_TPF_PROFILING
9696 /* When expanding builtin_return_addr in ESA mode we do not
9697 know whether r14 will later be needed as scratch reg when
9698 doing branch splitting. So the builtin always accesses the
9699 r14 save slot and we need to stick to the save/restore
9700 decision for r14 even if it turns out that it didn't get
9702 || cfun_frame_layout.save_return_addr_p
9703 || crtl->calls_eh_return);
9705 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9707 for (i = 6; i < 16; i++)
9708 if (!clobbered_regs[i])
9709 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9711 s390_register_info_set_ranges ();
9712 s390_register_info_stdarg_gpr ();
9715 /* Fill cfun->machine with info about frame of current function. */
9718 s390_frame_info (void)
9720 HOST_WIDE_INT lowest_offset;
9722 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9723 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9725 /* The va_arg builtin uses a constant distance of 16 *
9726 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9727 pointer. So even if we are going to save the stack pointer in an
9728 FPR we need the stack space in order to keep the offsets
9730 if (cfun->stdarg && cfun_save_arg_fprs_p)
9732 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9734 if (cfun_frame_layout.first_save_gpr_slot == -1)
9735 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9738 cfun_frame_layout.frame_size = get_frame_size ();
9739 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9740 fatal_error (input_location,
9741 "total size of local variables exceeds architecture limit");
9743 if (!TARGET_PACKED_STACK)
9745 /* Fixed stack layout. */
9746 cfun_frame_layout.backchain_offset = 0;
9747 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9748 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9749 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9750 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9753 else if (TARGET_BACKCHAIN)
9755 /* Kernel stack layout - packed stack, backchain, no float */
9756 gcc_assert (TARGET_SOFT_FLOAT);
9757 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9760 /* The distance between the backchain and the return address
9761 save slot must not change. So we always need a slot for the
9762 stack pointer which resides in between. */
9763 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9765 cfun_frame_layout.gprs_offset
9766 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9768 /* FPRs will not be saved. Nevertheless pick sane values to
9769 keep area calculations valid. */
9770 cfun_frame_layout.f0_offset =
9771 cfun_frame_layout.f4_offset =
9772 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9778 /* Packed stack layout without backchain. */
9780 /* With stdarg FPRs need their dedicated slots. */
9781 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9782 : (cfun_fpr_save_p (FPR4_REGNUM) +
9783 cfun_fpr_save_p (FPR6_REGNUM)));
9784 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9786 num_fprs = (cfun->stdarg ? 2
9787 : (cfun_fpr_save_p (FPR0_REGNUM)
9788 + cfun_fpr_save_p (FPR2_REGNUM)));
9789 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9791 cfun_frame_layout.gprs_offset
9792 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9794 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9795 - cfun_frame_layout.high_fprs * 8);
9798 if (cfun_save_high_fprs_p)
9799 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9802 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9804 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9805 sized area at the bottom of the stack. This is required also for
9806 leaf functions. When GCC generates a local stack reference it
9807 will always add STACK_POINTER_OFFSET to all these references. */
9809 && !TARGET_TPF_PROFILING
9810 && cfun_frame_layout.frame_size == 0
9811 && !cfun->calls_alloca)
9814 /* Calculate the number of bytes we have used in our own register
9815 save area. With the packed stack layout we can re-use the
9816 remaining bytes for normal stack elements. */
9818 if (TARGET_PACKED_STACK)
9819 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9820 cfun_frame_layout.f4_offset),
9821 cfun_frame_layout.gprs_offset);
9825 if (TARGET_BACKCHAIN)
9826 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9828 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9830 /* If under 31 bit an odd number of gprs has to be saved we have to
9831 adjust the frame size to sustain 8 byte alignment of stack
9833 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9834 STACK_BOUNDARY / BITS_PER_UNIT - 1)
9835 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9838 /* Generate frame layout. Fills in register and frame data for the current
9839 function in cfun->machine. This routine can be called multiple times;
9840 it will re-do the complete frame layout every time. */
9843 s390_init_frame_layout (void)
9845 HOST_WIDE_INT frame_size;
9848 /* After LRA the frame layout is supposed to be read-only and should
9849 not be re-computed. */
9850 if (reload_completed)
9853 /* On S/390 machines, we may need to perform branch splitting, which
9854 will require both base and return address register. We have no
9855 choice but to assume we're going to need them until right at the
9856 end of the machine dependent reorg phase. */
9857 if (!TARGET_CPU_ZARCH)
9858 cfun->machine->split_branches_pending_p = true;
9862 frame_size = cfun_frame_layout.frame_size;
9864 /* Try to predict whether we'll need the base register. */
9865 base_used = cfun->machine->split_branches_pending_p
9866 || crtl->uses_const_pool
9867 || (!DISP_IN_RANGE (frame_size)
9868 && !CONST_OK_FOR_K (frame_size));
9870 /* Decide which register to use as literal pool base. In small
9871 leaf functions, try to use an unused call-clobbered register
9872 as base register to avoid save/restore overhead. */
9874 cfun->machine->base_reg = NULL_RTX;
9880 /* Prefer r5 (most likely to be free). */
9881 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
9883 cfun->machine->base_reg =
9884 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
9887 s390_register_info ();
9890 while (frame_size != cfun_frame_layout.frame_size);
9893 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
9894 the TX is nonescaping. A transaction is considered escaping if
9895 there is at least one path from tbegin returning CC0 to the
9896 function exit block without an tend.
9898 The check so far has some limitations:
9899 - only single tbegin/tend BBs are supported
9900 - the first cond jump after tbegin must separate the CC0 path from ~CC0
9901 - when CC is copied to a GPR and the CC0 check is done with the GPR
9902 this is not supported
9906 s390_optimize_nonescaping_tx (void)
9908 const unsigned int CC0 = 1 << 3;
9909 basic_block tbegin_bb = NULL;
9910 basic_block tend_bb = NULL;
9915 rtx_insn *tbegin_insn = NULL;
9917 if (!cfun->machine->tbegin_p)
9920 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
9922 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
9927 FOR_BB_INSNS (bb, insn)
9929 rtx ite, cc, pat, target;
9930 unsigned HOST_WIDE_INT mask;
9932 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
9935 pat = PATTERN (insn);
9937 if (GET_CODE (pat) == PARALLEL)
9938 pat = XVECEXP (pat, 0, 0);
9940 if (GET_CODE (pat) != SET
9941 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
9944 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
9950 /* Just return if the tbegin doesn't have clobbers. */
9951 if (GET_CODE (PATTERN (insn)) != PARALLEL)
9954 if (tbegin_bb != NULL)
9957 /* Find the next conditional jump. */
9958 for (tmp = NEXT_INSN (insn);
9960 tmp = NEXT_INSN (tmp))
9962 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
9967 ite = SET_SRC (PATTERN (tmp));
9968 if (GET_CODE (ite) != IF_THEN_ELSE)
9971 cc = XEXP (XEXP (ite, 0), 0);
9972 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
9973 || GET_MODE (cc) != CCRAWmode
9974 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
9977 if (bb->succs->length () != 2)
9980 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
9981 if (GET_CODE (XEXP (ite, 0)) == NE)
9985 target = XEXP (ite, 1);
9986 else if (mask == (CC0 ^ 0xf))
9987 target = XEXP (ite, 2);
9995 ei = ei_start (bb->succs);
9996 e1 = ei_safe_edge (ei);
9998 e2 = ei_safe_edge (ei);
10000 if (e2->flags & EDGE_FALLTHRU)
10003 e1 = ei_safe_edge (ei);
10006 if (!(e1->flags & EDGE_FALLTHRU))
10009 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10011 if (tmp == BB_END (bb))
10016 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10018 if (tend_bb != NULL)
10025 /* Either we successfully remove the FPR clobbers here or we are not
10026 able to do anything for this TX. Both cases don't qualify for
10028 cfun->machine->tbegin_p = false;
10030 if (tbegin_bb == NULL || tend_bb == NULL)
10033 calculate_dominance_info (CDI_POST_DOMINATORS);
10034 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10035 free_dominance_info (CDI_POST_DOMINATORS);
10040 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10042 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10043 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10044 INSN_CODE (tbegin_insn) = -1;
10045 df_insn_rescan (tbegin_insn);
10050 /* Return true if it is legal to put a value with MODE into REGNO. */
10053 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10055 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10058 switch (REGNO_REG_CLASS (regno))
10061 return ((GET_MODE_CLASS (mode) == MODE_INT
10062 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10064 || s390_vector_mode_supported_p (mode));
10068 && ((GET_MODE_CLASS (mode) == MODE_INT
10069 && s390_class_max_nregs (FP_REGS, mode) == 1)
10071 || s390_vector_mode_supported_p (mode)))
10074 if (REGNO_PAIR_OK (regno, mode))
10076 if (mode == SImode || mode == DImode)
10079 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10084 if (FRAME_REGNO_P (regno) && mode == Pmode)
10089 if (REGNO_PAIR_OK (regno, mode))
10092 || (mode != TFmode && mode != TCmode && mode != TDmode))
10097 if (GET_MODE_CLASS (mode) == MODE_CC)
10101 if (REGNO_PAIR_OK (regno, mode))
10103 if (mode == SImode || mode == Pmode)
10114 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10117 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10119 /* Once we've decided upon a register to use as base register, it must
10120 no longer be used for any other purpose. */
10121 if (cfun->machine->base_reg)
10122 if (REGNO (cfun->machine->base_reg) == old_reg
10123 || REGNO (cfun->machine->base_reg) == new_reg)
10126 /* Prevent regrename from using call-saved regs which haven't
10127 actually been saved. This is necessary since regrename assumes
10128 the backend save/restore decisions are based on
10129 df_regs_ever_live. Since we have our own routine we have to tell
10130 regrename manually about it. */
10131 if (GENERAL_REGNO_P (new_reg)
10132 && !call_really_used_regs[new_reg]
10133 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10139 /* Return nonzero if register REGNO can be used as a scratch register
10143 s390_hard_regno_scratch_ok (unsigned int regno)
10145 /* See s390_hard_regno_rename_ok. */
10146 if (GENERAL_REGNO_P (regno)
10147 && !call_really_used_regs[regno]
10148 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10154 /* Maximum number of registers to represent a value of mode MODE
10155 in a register of class RCLASS. */
10158 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10161 bool reg_pair_required_p = false;
10167 reg_size = TARGET_VX ? 16 : 8;
10169 /* TF and TD modes would fit into a VR but we put them into a
10170 register pair since we do not have 128bit FP instructions on
10173 && SCALAR_FLOAT_MODE_P (mode)
10174 && GET_MODE_SIZE (mode) >= 16)
10175 reg_pair_required_p = true;
10177 /* Even if complex types would fit into a single FPR/VR we force
10178 them into a register pair to deal with the parts more easily.
10179 (FIXME: What about complex ints?) */
10180 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10181 reg_pair_required_p = true;
10187 reg_size = UNITS_PER_WORD;
10191 if (reg_pair_required_p)
10192 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10194 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10197 /* Return TRUE if changing mode from FROM to TO should not be allowed
10198 for register class CLASS. */
10201 s390_cannot_change_mode_class (machine_mode from_mode,
10202 machine_mode to_mode,
10203 enum reg_class rclass)
10205 machine_mode small_mode;
10206 machine_mode big_mode;
10208 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10211 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10213 small_mode = from_mode;
10214 big_mode = to_mode;
10218 small_mode = to_mode;
10219 big_mode = from_mode;
10222 /* Values residing in VRs are little-endian style. All modes are
10223 placed left-aligned in an VR. This means that we cannot allow
10224 switching between modes with differing sizes. Also if the vector
10225 facility is available we still place TFmode values in VR register
10226 pairs, since the only instructions we have operating on TFmodes
10227 only deal with register pairs. Therefore we have to allow DFmode
10228 subregs of TFmodes to enable the TFmode splitters. */
10229 if (reg_classes_intersect_p (VEC_REGS, rclass)
10230 && (GET_MODE_SIZE (small_mode) < 8
10231 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10234 /* Likewise for access registers, since they have only half the
10235 word size on 64-bit. */
10236 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10242 /* Return true if we use LRA instead of reload pass. */
10246 return s390_lra_flag;
10249 /* Return true if register FROM can be eliminated via register TO. */
10252 s390_can_eliminate (const int from, const int to)
10254 /* On zSeries machines, we have not marked the base register as fixed.
10255 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10256 If a function requires the base register, we say here that this
10257 elimination cannot be performed. This will cause reload to free
10258 up the base register (as if it were fixed). On the other hand,
10259 if the current function does *not* require the base register, we
10260 say here the elimination succeeds, which in turn allows reload
10261 to allocate the base register for any other purpose. */
10262 if (from == BASE_REGNUM && to == BASE_REGNUM)
10264 if (TARGET_CPU_ZARCH)
10266 s390_init_frame_layout ();
10267 return cfun->machine->base_reg == NULL_RTX;
10273 /* Everything else must point into the stack frame. */
10274 gcc_assert (to == STACK_POINTER_REGNUM
10275 || to == HARD_FRAME_POINTER_REGNUM);
10277 gcc_assert (from == FRAME_POINTER_REGNUM
10278 || from == ARG_POINTER_REGNUM
10279 || from == RETURN_ADDRESS_POINTER_REGNUM);
10281 /* Make sure we actually saved the return address. */
10282 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10283 if (!crtl->calls_eh_return
10285 && !cfun_frame_layout.save_return_addr_p)
10291 /* Return offset between register FROM and TO initially after prolog. */
10294 s390_initial_elimination_offset (int from, int to)
10296 HOST_WIDE_INT offset;
10298 /* ??? Why are we called for non-eliminable pairs? */
10299 if (!s390_can_eliminate (from, to))
10304 case FRAME_POINTER_REGNUM:
10305 offset = (get_frame_size()
10306 + STACK_POINTER_OFFSET
10307 + crtl->outgoing_args_size);
10310 case ARG_POINTER_REGNUM:
10311 s390_init_frame_layout ();
10312 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10315 case RETURN_ADDRESS_POINTER_REGNUM:
10316 s390_init_frame_layout ();
10318 if (cfun_frame_layout.first_save_gpr_slot == -1)
10320 /* If it turns out that for stdarg nothing went into the reg
10321 save area we also do not need the return address
10323 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10326 gcc_unreachable ();
10329 /* In order to make the following work it is not necessary for
10330 r14 to have a save slot. It is sufficient if one other GPR
10331 got one. Since the GPRs are always stored without gaps we
10332 are able to calculate where the r14 save slot would
10334 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10335 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10344 gcc_unreachable ();
10350 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10351 to register BASE. Return generated insn. */
10354 save_fpr (rtx base, int offset, int regnum)
10357 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10359 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10360 set_mem_alias_set (addr, get_varargs_alias_set ());
10362 set_mem_alias_set (addr, get_frame_alias_set ());
10364 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10367 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10368 to register BASE. Return generated insn. */
10371 restore_fpr (rtx base, int offset, int regnum)
10374 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10375 set_mem_alias_set (addr, get_frame_alias_set ());
10377 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10380 /* Return true if REGNO is a global register, but not one
10381 of the special ones that need to be saved/restored in anyway. */
10384 global_not_special_regno_p (int regno)
10386 return (global_regs[regno]
10387 /* These registers are special and need to be
10388 restored in any case. */
10389 && !(regno == STACK_POINTER_REGNUM
10390 || regno == RETURN_REGNUM
10391 || regno == BASE_REGNUM
10392 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10395 /* Generate insn to save registers FIRST to LAST into
10396 the register save area located at offset OFFSET
10397 relative to register BASE. */
10400 save_gprs (rtx base, int offset, int first, int last)
10402 rtx addr, insn, note;
10405 addr = plus_constant (Pmode, base, offset);
10406 addr = gen_rtx_MEM (Pmode, addr);
10408 set_mem_alias_set (addr, get_frame_alias_set ());
10410 /* Special-case single register. */
10414 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10416 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10418 if (!global_not_special_regno_p (first))
10419 RTX_FRAME_RELATED_P (insn) = 1;
10424 insn = gen_store_multiple (addr,
10425 gen_rtx_REG (Pmode, first),
10426 GEN_INT (last - first + 1));
10428 if (first <= 6 && cfun->stdarg)
10429 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10431 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10433 if (first + i <= 6)
10434 set_mem_alias_set (mem, get_varargs_alias_set ());
10437 /* We need to set the FRAME_RELATED flag on all SETs
10438 inside the store-multiple pattern.
10440 However, we must not emit DWARF records for registers 2..5
10441 if they are stored for use by variable arguments ...
10443 ??? Unfortunately, it is not enough to simply not the
10444 FRAME_RELATED flags for those SETs, because the first SET
10445 of the PARALLEL is always treated as if it had the flag
10446 set, even if it does not. Therefore we emit a new pattern
10447 without those registers as REG_FRAME_RELATED_EXPR note. */
10449 if (first >= 6 && !global_not_special_regno_p (first))
10451 rtx pat = PATTERN (insn);
10453 for (i = 0; i < XVECLEN (pat, 0); i++)
10454 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10455 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10457 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10459 RTX_FRAME_RELATED_P (insn) = 1;
10461 else if (last >= 6)
10465 for (start = first >= 6 ? first : 6; start <= last; start++)
10466 if (!global_not_special_regno_p (start))
10472 addr = plus_constant (Pmode, base,
10473 offset + (start - first) * UNITS_PER_LONG);
10478 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10479 gen_rtx_REG (Pmode, start));
10481 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10482 gen_rtx_REG (Pmode, start));
10483 note = PATTERN (note);
10485 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10486 RTX_FRAME_RELATED_P (insn) = 1;
10491 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10492 gen_rtx_REG (Pmode, start),
10493 GEN_INT (last - start + 1));
10494 note = PATTERN (note);
10496 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10498 for (i = 0; i < XVECLEN (note, 0); i++)
10499 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10500 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10502 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10504 RTX_FRAME_RELATED_P (insn) = 1;
10510 /* Generate insn to restore registers FIRST to LAST from
10511 the register save area located at offset OFFSET
10512 relative to register BASE. */
10515 restore_gprs (rtx base, int offset, int first, int last)
10519 addr = plus_constant (Pmode, base, offset);
10520 addr = gen_rtx_MEM (Pmode, addr);
10521 set_mem_alias_set (addr, get_frame_alias_set ());
10523 /* Special-case single register. */
10527 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10529 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10531 RTX_FRAME_RELATED_P (insn) = 1;
10535 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10537 GEN_INT (last - first + 1));
10538 RTX_FRAME_RELATED_P (insn) = 1;
10542 /* Return insn sequence to load the GOT register. */
10544 static GTY(()) rtx got_symbol;
10546 s390_load_got (void)
10550 /* We cannot use pic_offset_table_rtx here since we use this
10551 function also for non-pic if __tls_get_offset is called and in
10552 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10554 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10558 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10559 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
10564 if (TARGET_CPU_ZARCH)
10566 emit_move_insn (got_rtx, got_symbol);
10572 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
10573 UNSPEC_LTREL_OFFSET);
10574 offset = gen_rtx_CONST (Pmode, offset);
10575 offset = force_const_mem (Pmode, offset);
10577 emit_move_insn (got_rtx, offset);
10579 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
10580 UNSPEC_LTREL_BASE);
10581 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
10583 emit_move_insn (got_rtx, offset);
10586 insns = get_insns ();
10591 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10592 and the change to the stack pointer. */
10595 s390_emit_stack_tie (void)
10597 rtx mem = gen_frame_mem (BLKmode,
10598 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10600 emit_insn (gen_stack_tie (mem));
10603 /* Copy GPRS into FPR save slots. */
10606 s390_save_gprs_to_fprs (void)
10610 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10613 for (i = 6; i < 16; i++)
10615 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10618 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10619 gen_rtx_REG (DImode, i));
10620 RTX_FRAME_RELATED_P (insn) = 1;
10621 /* This prevents dwarf2cfi from interpreting the set. Doing
10622 so it might emit def_cfa_register infos setting an FPR as
10624 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10629 /* Restore GPRs from FPR save slots. */
10632 s390_restore_gprs_from_fprs (void)
10636 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10639 for (i = 6; i < 16; i++)
10643 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10646 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10648 if (i == STACK_POINTER_REGNUM)
10649 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10651 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10653 df_set_regs_ever_live (i, true);
10654 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10655 if (i == STACK_POINTER_REGNUM)
10656 add_reg_note (insn, REG_CFA_DEF_CFA,
10657 plus_constant (Pmode, stack_pointer_rtx,
10658 STACK_POINTER_OFFSET));
10659 RTX_FRAME_RELATED_P (insn) = 1;
10664 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10669 const pass_data pass_data_s390_early_mach =
10671 RTL_PASS, /* type */
10672 "early_mach", /* name */
10673 OPTGROUP_NONE, /* optinfo_flags */
10674 TV_MACH_DEP, /* tv_id */
10675 0, /* properties_required */
10676 0, /* properties_provided */
10677 0, /* properties_destroyed */
10678 0, /* todo_flags_start */
10679 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10682 class pass_s390_early_mach : public rtl_opt_pass
10685 pass_s390_early_mach (gcc::context *ctxt)
10686 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10689 /* opt_pass methods: */
10690 virtual unsigned int execute (function *);
10692 }; // class pass_s390_early_mach
10695 pass_s390_early_mach::execute (function *fun)
10699 /* Try to get rid of the FPR clobbers. */
10700 s390_optimize_nonescaping_tx ();
10702 /* Re-compute register info. */
10703 s390_register_info ();
10705 /* If we're using a base register, ensure that it is always valid for
10706 the first non-prologue instruction. */
10707 if (fun->machine->base_reg)
10708 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10710 /* Annotate all constant pool references to let the scheduler know
10711 they implicitly use the base register. */
10712 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10715 annotate_constant_pool_refs (&PATTERN (insn));
10716 df_insn_rescan (insn);
10721 } // anon namespace
10723 /* Expand the prologue into a bunch of separate insns. */
10726 s390_emit_prologue (void)
10734 /* Choose best register to use for temp use within prologue.
10735 TPF with profiling must avoid the register 14 - the tracing function
10736 needs the original contents of r14 to be preserved. */
10738 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10740 && !TARGET_TPF_PROFILING)
10741 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10742 else if (flag_split_stack && cfun->stdarg)
10743 temp_reg = gen_rtx_REG (Pmode, 12);
10745 temp_reg = gen_rtx_REG (Pmode, 1);
10747 s390_save_gprs_to_fprs ();
10749 /* Save call saved gprs. */
10750 if (cfun_frame_layout.first_save_gpr != -1)
10752 insn = save_gprs (stack_pointer_rtx,
10753 cfun_frame_layout.gprs_offset +
10754 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
10755 - cfun_frame_layout.first_save_gpr_slot),
10756 cfun_frame_layout.first_save_gpr,
10757 cfun_frame_layout.last_save_gpr);
10761 /* Dummy insn to mark literal pool slot. */
10763 if (cfun->machine->base_reg)
10764 emit_insn (gen_main_pool (cfun->machine->base_reg));
10766 offset = cfun_frame_layout.f0_offset;
10768 /* Save f0 and f2. */
10769 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
10771 if (cfun_fpr_save_p (i))
10773 save_fpr (stack_pointer_rtx, offset, i);
10776 else if (!TARGET_PACKED_STACK || cfun->stdarg)
10780 /* Save f4 and f6. */
10781 offset = cfun_frame_layout.f4_offset;
10782 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10784 if (cfun_fpr_save_p (i))
10786 insn = save_fpr (stack_pointer_rtx, offset, i);
10789 /* If f4 and f6 are call clobbered they are saved due to
10790 stdargs and therefore are not frame related. */
10791 if (!call_really_used_regs[i])
10792 RTX_FRAME_RELATED_P (insn) = 1;
10794 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
10798 if (TARGET_PACKED_STACK
10799 && cfun_save_high_fprs_p
10800 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
10802 offset = (cfun_frame_layout.f8_offset
10803 + (cfun_frame_layout.high_fprs - 1) * 8);
10805 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
10806 if (cfun_fpr_save_p (i))
10808 insn = save_fpr (stack_pointer_rtx, offset, i);
10810 RTX_FRAME_RELATED_P (insn) = 1;
10813 if (offset >= cfun_frame_layout.f8_offset)
10817 if (!TARGET_PACKED_STACK)
10818 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
10820 if (flag_stack_usage_info)
10821 current_function_static_stack_size = cfun_frame_layout.frame_size;
10823 /* Decrement stack pointer. */
10825 if (cfun_frame_layout.frame_size > 0)
10827 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10828 rtx real_frame_off;
10830 if (s390_stack_size)
10832 HOST_WIDE_INT stack_guard;
10834 if (s390_stack_guard)
10835 stack_guard = s390_stack_guard;
10838 /* If no value for stack guard is provided the smallest power of 2
10839 larger than the current frame size is chosen. */
10841 while (stack_guard < cfun_frame_layout.frame_size)
10845 if (cfun_frame_layout.frame_size >= s390_stack_size)
10847 warning (0, "frame size of function %qs is %wd"
10848 " bytes exceeding user provided stack limit of "
10850 "An unconditional trap is added.",
10851 current_function_name(), cfun_frame_layout.frame_size,
10853 emit_insn (gen_trap ());
10858 /* stack_guard has to be smaller than s390_stack_size.
10859 Otherwise we would emit an AND with zero which would
10860 not match the test under mask pattern. */
10861 if (stack_guard >= s390_stack_size)
10863 warning (0, "frame size of function %qs is %wd"
10864 " bytes which is more than half the stack size. "
10865 "The dynamic check would not be reliable. "
10866 "No check emitted for this function.",
10867 current_function_name(),
10868 cfun_frame_layout.frame_size);
10872 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
10873 & ~(stack_guard - 1));
10875 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
10876 GEN_INT (stack_check_mask));
10878 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
10880 t, const0_rtx, const0_rtx));
10882 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
10884 t, const0_rtx, const0_rtx));
10889 if (s390_warn_framesize > 0
10890 && cfun_frame_layout.frame_size >= s390_warn_framesize)
10891 warning (0, "frame size of %qs is %wd bytes",
10892 current_function_name (), cfun_frame_layout.frame_size);
10894 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
10895 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
10897 /* Save incoming stack pointer into temp reg. */
10898 if (TARGET_BACKCHAIN || next_fpr)
10899 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
10901 /* Subtract frame size from stack pointer. */
10903 if (DISP_IN_RANGE (INTVAL (frame_off)))
10905 insn = gen_rtx_SET (stack_pointer_rtx,
10906 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10908 insn = emit_insn (insn);
10912 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10913 frame_off = force_const_mem (Pmode, frame_off);
10915 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
10916 annotate_constant_pool_refs (&PATTERN (insn));
10919 RTX_FRAME_RELATED_P (insn) = 1;
10920 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10921 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10922 gen_rtx_SET (stack_pointer_rtx,
10923 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10926 /* Set backchain. */
10928 if (TARGET_BACKCHAIN)
10930 if (cfun_frame_layout.backchain_offset)
10931 addr = gen_rtx_MEM (Pmode,
10932 plus_constant (Pmode, stack_pointer_rtx,
10933 cfun_frame_layout.backchain_offset));
10935 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10936 set_mem_alias_set (addr, get_frame_alias_set ());
10937 insn = emit_insn (gen_move_insn (addr, temp_reg));
10940 /* If we support non-call exceptions (e.g. for Java),
10941 we need to make sure the backchain pointer is set up
10942 before any possibly trapping memory access. */
10943 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
10945 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10946 emit_clobber (addr);
10950 /* Save fprs 8 - 15 (64 bit ABI). */
10952 if (cfun_save_high_fprs_p && next_fpr)
10954 /* If the stack might be accessed through a different register
10955 we have to make sure that the stack pointer decrement is not
10956 moved below the use of the stack slots. */
10957 s390_emit_stack_tie ();
10959 insn = emit_insn (gen_add2_insn (temp_reg,
10960 GEN_INT (cfun_frame_layout.f8_offset)));
10964 for (i = FPR8_REGNUM; i <= next_fpr; i++)
10965 if (cfun_fpr_save_p (i))
10967 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
10968 cfun_frame_layout.frame_size
10969 + cfun_frame_layout.f8_offset
10972 insn = save_fpr (temp_reg, offset, i);
10974 RTX_FRAME_RELATED_P (insn) = 1;
10975 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10976 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
10977 gen_rtx_REG (DFmode, i)));
10981 /* Set frame pointer, if needed. */
10983 if (frame_pointer_needed)
10985 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10986 RTX_FRAME_RELATED_P (insn) = 1;
10989 /* Set up got pointer, if needed. */
10991 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10993 rtx_insn *insns = s390_load_got ();
10995 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
10996 annotate_constant_pool_refs (&PATTERN (insn));
11001 if (TARGET_TPF_PROFILING)
11003 /* Generate a BAS instruction to serve as a function
11004 entry intercept to facilitate the use of tracing
11005 algorithms located at the branch target. */
11006 emit_insn (gen_prologue_tpf ());
11008 /* Emit a blockage here so that all code
11009 lies between the profiling mechanisms. */
11010 emit_insn (gen_blockage ());
11014 /* Expand the epilogue into a bunch of separate insns. */
11017 s390_emit_epilogue (bool sibcall)
11019 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
11020 int area_bottom, area_top, offset = 0;
11025 if (TARGET_TPF_PROFILING)
11028 /* Generate a BAS instruction to serve as a function
11029 entry intercept to facilitate the use of tracing
11030 algorithms located at the branch target. */
11032 /* Emit a blockage here so that all code
11033 lies between the profiling mechanisms. */
11034 emit_insn (gen_blockage ());
11036 emit_insn (gen_epilogue_tpf ());
11039 /* Check whether to use frame or stack pointer for restore. */
11041 frame_pointer = (frame_pointer_needed
11042 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11044 s390_frame_area (&area_bottom, &area_top);
11046 /* Check whether we can access the register save area.
11047 If not, increment the frame pointer as required. */
11049 if (area_top <= area_bottom)
11051 /* Nothing to restore. */
11053 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11054 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11056 /* Area is in range. */
11057 offset = cfun_frame_layout.frame_size;
11061 rtx insn, frame_off, cfa;
11063 offset = area_bottom < 0 ? -area_bottom : 0;
11064 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11066 cfa = gen_rtx_SET (frame_pointer,
11067 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11068 if (DISP_IN_RANGE (INTVAL (frame_off)))
11070 insn = gen_rtx_SET (frame_pointer,
11071 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11072 insn = emit_insn (insn);
11076 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11077 frame_off = force_const_mem (Pmode, frame_off);
11079 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11080 annotate_constant_pool_refs (&PATTERN (insn));
11082 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11083 RTX_FRAME_RELATED_P (insn) = 1;
11086 /* Restore call saved fprs. */
11090 if (cfun_save_high_fprs_p)
11092 next_offset = cfun_frame_layout.f8_offset;
11093 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11095 if (cfun_fpr_save_p (i))
11097 restore_fpr (frame_pointer,
11098 offset + next_offset, i);
11100 = alloc_reg_note (REG_CFA_RESTORE,
11101 gen_rtx_REG (DFmode, i), cfa_restores);
11110 next_offset = cfun_frame_layout.f4_offset;
11112 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11114 if (cfun_fpr_save_p (i))
11116 restore_fpr (frame_pointer,
11117 offset + next_offset, i);
11119 = alloc_reg_note (REG_CFA_RESTORE,
11120 gen_rtx_REG (DFmode, i), cfa_restores);
11123 else if (!TARGET_PACKED_STACK)
11129 /* Return register. */
11131 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11133 /* Restore call saved gprs. */
11135 if (cfun_frame_layout.first_restore_gpr != -1)
11140 /* Check for global register and save them
11141 to stack location from where they get restored. */
11143 for (i = cfun_frame_layout.first_restore_gpr;
11144 i <= cfun_frame_layout.last_restore_gpr;
11147 if (global_not_special_regno_p (i))
11149 addr = plus_constant (Pmode, frame_pointer,
11150 offset + cfun_frame_layout.gprs_offset
11151 + (i - cfun_frame_layout.first_save_gpr_slot)
11153 addr = gen_rtx_MEM (Pmode, addr);
11154 set_mem_alias_set (addr, get_frame_alias_set ());
11155 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11159 = alloc_reg_note (REG_CFA_RESTORE,
11160 gen_rtx_REG (Pmode, i), cfa_restores);
11165 /* Fetch return address from stack before load multiple,
11166 this will do good for scheduling.
11168 Only do this if we already decided that r14 needs to be
11169 saved to a stack slot. (And not just because r14 happens to
11170 be in between two GPRs which need saving.) Otherwise it
11171 would be difficult to take that decision back in
11172 s390_optimize_prologue. */
11173 if (cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK)
11175 int return_regnum = find_unused_clobbered_reg();
11176 if (!return_regnum)
11178 return_reg = gen_rtx_REG (Pmode, return_regnum);
11180 addr = plus_constant (Pmode, frame_pointer,
11181 offset + cfun_frame_layout.gprs_offset
11183 - cfun_frame_layout.first_save_gpr_slot)
11185 addr = gen_rtx_MEM (Pmode, addr);
11186 set_mem_alias_set (addr, get_frame_alias_set ());
11187 emit_move_insn (return_reg, addr);
11189 /* Once we did that optimization we have to make sure
11190 s390_optimize_prologue does not try to remove the
11191 store of r14 since we will not be able to find the
11192 load issued here. */
11193 cfun_frame_layout.save_return_addr_p = true;
11197 insn = restore_gprs (frame_pointer,
11198 offset + cfun_frame_layout.gprs_offset
11199 + (cfun_frame_layout.first_restore_gpr
11200 - cfun_frame_layout.first_save_gpr_slot)
11202 cfun_frame_layout.first_restore_gpr,
11203 cfun_frame_layout.last_restore_gpr);
11204 insn = emit_insn (insn);
11205 REG_NOTES (insn) = cfa_restores;
11206 add_reg_note (insn, REG_CFA_DEF_CFA,
11207 plus_constant (Pmode, stack_pointer_rtx,
11208 STACK_POINTER_OFFSET));
11209 RTX_FRAME_RELATED_P (insn) = 1;
11212 s390_restore_gprs_from_fprs ();
11217 /* Return to caller. */
11219 p = rtvec_alloc (2);
11221 RTVEC_ELT (p, 0) = ret_rtx;
11222 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
11223 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
11227 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11230 s300_set_up_by_prologue (hard_reg_set_container *regs)
11232 if (cfun->machine->base_reg
11233 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11234 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11237 /* -fsplit-stack support. */
11239 /* A SYMBOL_REF for __morestack. */
11240 static GTY(()) rtx morestack_ref;
11242 /* When using -fsplit-stack, the allocation routines set a field in
11243 the TCB to the bottom of the stack plus this much space, measured
11246 #define SPLIT_STACK_AVAILABLE 1024
11248 /* Emit -fsplit-stack prologue, which goes before the regular function
11252 s390_expand_split_stack_prologue (void)
11254 rtx r1, guard, cc = NULL;
11256 /* Offset from thread pointer to __private_ss. */
11257 int psso = TARGET_64BIT ? 0x38 : 0x20;
11258 /* Pointer size in bytes. */
11259 /* Frame size and argument size - the two parameters to __morestack. */
11260 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11261 /* Align argument size to 8 bytes - simplifies __morestack code. */
11262 HOST_WIDE_INT args_size = crtl->args.size >= 0
11263 ? ((crtl->args.size + 7) & ~7)
11265 /* Label to be called by __morestack. */
11266 rtx_code_label *call_done = NULL;
11267 rtx_code_label *parm_base = NULL;
11270 gcc_assert (flag_split_stack && reload_completed);
11271 if (!TARGET_CPU_ZARCH)
11273 sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11277 r1 = gen_rtx_REG (Pmode, 1);
11279 /* If no stack frame will be allocated, don't do anything. */
11282 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11284 /* If va_start is used, just use r15. */
11285 emit_move_insn (r1,
11286 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11287 GEN_INT (STACK_POINTER_OFFSET)));
11293 if (morestack_ref == NULL_RTX)
11295 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11296 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11297 | SYMBOL_FLAG_FUNCTION);
11300 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11302 /* If frame_size will fit in an add instruction, do a stack space
11303 check, and only call __morestack if there's not enough space. */
11305 /* Get thread pointer. r1 is the only register we can always destroy - r0
11306 could contain a static chain (and cannot be used to address memory
11307 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11308 emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
11309 /* Aim at __private_ss. */
11310 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11312 /* If less that 1kiB used, skip addition and compare directly with
11314 if (frame_size > SPLIT_STACK_AVAILABLE)
11316 emit_move_insn (r1, guard);
11318 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11320 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11324 /* Compare the (maybe adjusted) guard with the stack pointer. */
11325 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11328 call_done = gen_label_rtx ();
11329 parm_base = gen_label_rtx ();
11331 /* Emit the parameter block. */
11332 tmp = gen_split_stack_data (parm_base, call_done,
11333 GEN_INT (frame_size),
11334 GEN_INT (args_size));
11335 insn = emit_insn (tmp);
11336 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11337 LABEL_NUSES (call_done)++;
11338 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11339 LABEL_NUSES (parm_base)++;
11341 /* %r1 = litbase. */
11342 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11343 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11344 LABEL_NUSES (parm_base)++;
11346 /* Now, we need to call __morestack. It has very special calling
11347 conventions: it preserves param/return/static chain registers for
11348 calling main function body, and looks for its own parameters at %r1. */
11352 tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
11354 insn = emit_jump_insn (tmp);
11355 JUMP_LABEL (insn) = call_done;
11356 LABEL_NUSES (call_done)++;
11358 /* Mark the jump as very unlikely to be taken. */
11359 add_int_reg_note (insn, REG_BR_PROB, REG_BR_PROB_BASE / 100);
11361 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11363 /* If va_start is used, and __morestack was not called, just use
11365 emit_move_insn (r1,
11366 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11367 GEN_INT (STACK_POINTER_OFFSET)));
11372 tmp = gen_split_stack_call (morestack_ref, call_done);
11373 insn = emit_jump_insn (tmp);
11374 JUMP_LABEL (insn) = call_done;
11375 LABEL_NUSES (call_done)++;
11379 /* __morestack will call us here. */
11381 emit_label (call_done);
11384 /* We may have to tell the dataflow pass that the split stack prologue
11385 is initializing a register. */
11388 s390_live_on_entry (bitmap regs)
11390 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11392 gcc_assert (flag_split_stack);
11393 bitmap_set_bit (regs, 1);
11397 /* Return true if the function can use simple_return to return outside
11398 of a shrink-wrapped region. At present shrink-wrapping is supported
11402 s390_can_use_simple_return_insn (void)
11407 /* Return true if the epilogue is guaranteed to contain only a return
11408 instruction and if a direct return can therefore be used instead.
11409 One of the main advantages of using direct return instructions
11410 is that we can then use conditional returns. */
11413 s390_can_use_return_insn (void)
11417 if (!reload_completed)
11423 if (TARGET_TPF_PROFILING)
11426 for (i = 0; i < 16; i++)
11427 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11430 /* For 31 bit this is not covered by the frame_size check below
11431 since f4, f6 are saved in the register save area without needing
11432 additional stack space. */
11434 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11437 if (cfun->machine->base_reg
11438 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11441 return cfun_frame_layout.frame_size == 0;
11444 /* The VX ABI differs for vararg functions. Therefore we need the
11445 prototype of the callee to be available when passing vector type
11447 static const char *
11448 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11450 return ((TARGET_VX_ABI
11452 && VECTOR_TYPE_P (TREE_TYPE (val))
11453 && (funcdecl == NULL_TREE
11454 || (TREE_CODE (funcdecl) == FUNCTION_DECL
11455 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11456 ? N_("vector argument passed to unprototyped function")
11461 /* Return the size in bytes of a function argument of
11462 type TYPE and/or mode MODE. At least one of TYPE or
11463 MODE must be specified. */
11466 s390_function_arg_size (machine_mode mode, const_tree type)
11469 return int_size_in_bytes (type);
11471 /* No type info available for some library calls ... */
11472 if (mode != BLKmode)
11473 return GET_MODE_SIZE (mode);
11475 /* If we have neither type nor mode, abort */
11476 gcc_unreachable ();
11479 /* Return true if a function argument of type TYPE and mode MODE
11480 is to be passed in a vector register, if available. */
11483 s390_function_arg_vector (machine_mode mode, const_tree type)
11485 if (!TARGET_VX_ABI)
11488 if (s390_function_arg_size (mode, type) > 16)
11491 /* No type info available for some library calls ... */
11493 return VECTOR_MODE_P (mode);
11495 /* The ABI says that record types with a single member are treated
11496 just like that member would be. */
11497 while (TREE_CODE (type) == RECORD_TYPE)
11499 tree field, single = NULL_TREE;
11501 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11503 if (TREE_CODE (field) != FIELD_DECL)
11506 if (single == NULL_TREE)
11507 single = TREE_TYPE (field);
11512 if (single == NULL_TREE)
11516 /* If the field declaration adds extra byte due to
11517 e.g. padding this is not accepted as vector type. */
11518 if (int_size_in_bytes (single) <= 0
11519 || int_size_in_bytes (single) != int_size_in_bytes (type))
11525 return VECTOR_TYPE_P (type);
11528 /* Return true if a function argument of type TYPE and mode MODE
11529 is to be passed in a floating-point register, if available. */
11532 s390_function_arg_float (machine_mode mode, const_tree type)
11534 if (s390_function_arg_size (mode, type) > 8)
11537 /* Soft-float changes the ABI: no floating-point registers are used. */
11538 if (TARGET_SOFT_FLOAT)
11541 /* No type info available for some library calls ... */
11543 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11545 /* The ABI says that record types with a single member are treated
11546 just like that member would be. */
11547 while (TREE_CODE (type) == RECORD_TYPE)
11549 tree field, single = NULL_TREE;
11551 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11553 if (TREE_CODE (field) != FIELD_DECL)
11556 if (single == NULL_TREE)
11557 single = TREE_TYPE (field);
11562 if (single == NULL_TREE)
11568 return TREE_CODE (type) == REAL_TYPE;
11571 /* Return true if a function argument of type TYPE and mode MODE
11572 is to be passed in an integer register, or a pair of integer
11573 registers, if available. */
11576 s390_function_arg_integer (machine_mode mode, const_tree type)
11578 int size = s390_function_arg_size (mode, type);
11582 /* No type info available for some library calls ... */
11584 return GET_MODE_CLASS (mode) == MODE_INT
11585 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
11587 /* We accept small integral (and similar) types. */
11588 if (INTEGRAL_TYPE_P (type)
11589 || POINTER_TYPE_P (type)
11590 || TREE_CODE (type) == NULLPTR_TYPE
11591 || TREE_CODE (type) == OFFSET_TYPE
11592 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
11595 /* We also accept structs of size 1, 2, 4, 8 that are not
11596 passed in floating-point registers. */
11597 if (AGGREGATE_TYPE_P (type)
11598 && exact_log2 (size) >= 0
11599 && !s390_function_arg_float (mode, type))
11605 /* Return 1 if a function argument of type TYPE and mode MODE
11606 is to be passed by reference. The ABI specifies that only
11607 structures of size 1, 2, 4, or 8 bytes are passed by value,
11608 all other structures (and complex numbers) are passed by
11612 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
11613 machine_mode mode, const_tree type,
11614 bool named ATTRIBUTE_UNUSED)
11616 int size = s390_function_arg_size (mode, type);
11618 if (s390_function_arg_vector (mode, type))
11626 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
11629 if (TREE_CODE (type) == COMPLEX_TYPE
11630 || TREE_CODE (type) == VECTOR_TYPE)
11637 /* Update the data in CUM to advance over an argument of mode MODE and
11638 data type TYPE. (TYPE is null for libcalls where that information
11639 may not be available.). The boolean NAMED specifies whether the
11640 argument is a named argument (as opposed to an unnamed argument
11641 matching an ellipsis). */
11644 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
11645 const_tree type, bool named)
11647 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11649 if (s390_function_arg_vector (mode, type))
11651 /* We are called for unnamed vector stdarg arguments which are
11652 passed on the stack. In this case this hook does not have to
11653 do anything since stack arguments are tracked by common
11659 else if (s390_function_arg_float (mode, type))
11663 else if (s390_function_arg_integer (mode, type))
11665 int size = s390_function_arg_size (mode, type);
11666 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
11669 gcc_unreachable ();
11672 /* Define where to put the arguments to a function.
11673 Value is zero to push the argument on the stack,
11674 or a hard register in which to store the argument.
11676 MODE is the argument's machine mode.
11677 TYPE is the data type of the argument (as a tree).
11678 This is null for libcalls where that information may
11680 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11681 the preceding args and about the function being called.
11682 NAMED is nonzero if this argument is a named parameter
11683 (otherwise it is an extra parameter matching an ellipsis).
11685 On S/390, we use general purpose registers 2 through 6 to
11686 pass integer, pointer, and certain structure arguments, and
11687 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
11688 to pass floating point arguments. All remaining arguments
11689 are pushed to the stack. */
11692 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
11693 const_tree type, bool named)
11695 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11698 s390_check_type_for_vector_abi (type, true, false);
11700 if (s390_function_arg_vector (mode, type))
11702 /* Vector arguments being part of the ellipsis are passed on the
11704 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
11707 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
11709 else if (s390_function_arg_float (mode, type))
11711 if (cum->fprs + 1 > FP_ARG_NUM_REG)
11714 return gen_rtx_REG (mode, cum->fprs + 16);
11716 else if (s390_function_arg_integer (mode, type))
11718 int size = s390_function_arg_size (mode, type);
11719 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11721 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
11723 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
11724 return gen_rtx_REG (mode, cum->gprs + 2);
11725 else if (n_gprs == 2)
11727 rtvec p = rtvec_alloc (2);
11730 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
11733 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
11736 return gen_rtx_PARALLEL (mode, p);
11740 /* After the real arguments, expand_call calls us once again
11741 with a void_type_node type. Whatever we return here is
11742 passed as operand 2 to the call expanders.
11744 We don't need this feature ... */
11745 else if (type == void_type_node)
11748 gcc_unreachable ();
11751 /* Return true if return values of type TYPE should be returned
11752 in a memory buffer whose address is passed by the caller as
11753 hidden first argument. */
11756 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
11758 /* We accept small integral (and similar) types. */
11759 if (INTEGRAL_TYPE_P (type)
11760 || POINTER_TYPE_P (type)
11761 || TREE_CODE (type) == OFFSET_TYPE
11762 || TREE_CODE (type) == REAL_TYPE)
11763 return int_size_in_bytes (type) > 8;
11765 /* vector types which fit into a VR. */
11767 && VECTOR_TYPE_P (type)
11768 && int_size_in_bytes (type) <= 16)
11771 /* Aggregates and similar constructs are always returned
11773 if (AGGREGATE_TYPE_P (type)
11774 || TREE_CODE (type) == COMPLEX_TYPE
11775 || VECTOR_TYPE_P (type))
11778 /* ??? We get called on all sorts of random stuff from
11779 aggregate_value_p. We can't abort, but it's not clear
11780 what's safe to return. Pretend it's a struct I guess. */
11784 /* Function arguments and return values are promoted to word size. */
11786 static machine_mode
11787 s390_promote_function_mode (const_tree type, machine_mode mode,
11789 const_tree fntype ATTRIBUTE_UNUSED,
11790 int for_return ATTRIBUTE_UNUSED)
11792 if (INTEGRAL_MODE_P (mode)
11793 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
11795 if (type != NULL_TREE && POINTER_TYPE_P (type))
11796 *punsignedp = POINTERS_EXTEND_UNSIGNED;
11803 /* Define where to return a (scalar) value of type RET_TYPE.
11804 If RET_TYPE is null, define where to return a (scalar)
11805 value of mode MODE from a libcall. */
11808 s390_function_and_libcall_value (machine_mode mode,
11809 const_tree ret_type,
11810 const_tree fntype_or_decl,
11811 bool outgoing ATTRIBUTE_UNUSED)
11813 /* For vector return types it is important to use the RET_TYPE
11814 argument whenever available since the middle-end might have
11815 changed the mode to a scalar mode. */
11816 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
11817 || (!ret_type && VECTOR_MODE_P (mode)));
11819 /* For normal functions perform the promotion as
11820 promote_function_mode would do. */
11823 int unsignedp = TYPE_UNSIGNED (ret_type);
11824 mode = promote_function_mode (ret_type, mode, &unsignedp,
11825 fntype_or_decl, 1);
11828 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
11829 || SCALAR_FLOAT_MODE_P (mode)
11830 || (TARGET_VX_ABI && vector_ret_type_p));
11831 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
11833 if (TARGET_VX_ABI && vector_ret_type_p)
11834 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
11835 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
11836 return gen_rtx_REG (mode, 16);
11837 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
11838 || UNITS_PER_LONG == UNITS_PER_WORD)
11839 return gen_rtx_REG (mode, 2);
11840 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
11842 /* This case is triggered when returning a 64 bit value with
11843 -m31 -mzarch. Although the value would fit into a single
11844 register it has to be forced into a 32 bit register pair in
11845 order to match the ABI. */
11846 rtvec p = rtvec_alloc (2);
11849 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
11851 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
11853 return gen_rtx_PARALLEL (mode, p);
11856 gcc_unreachable ();
11859 /* Define where to return a scalar return value of type RET_TYPE. */
11862 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
11865 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
11866 fn_decl_or_type, outgoing);
11869 /* Define where to return a scalar libcall return value of mode
11873 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
11875 return s390_function_and_libcall_value (mode, NULL_TREE,
11880 /* Create and return the va_list datatype.
11882 On S/390, va_list is an array type equivalent to
11884 typedef struct __va_list_tag
11888 void *__overflow_arg_area;
11889 void *__reg_save_area;
11892 where __gpr and __fpr hold the number of general purpose
11893 or floating point arguments used up to now, respectively,
11894 __overflow_arg_area points to the stack location of the
11895 next argument passed on the stack, and __reg_save_area
11896 always points to the start of the register area in the
11897 call frame of the current function. The function prologue
11898 saves all registers used for argument passing into this
11899 area if the function uses variable arguments. */
11902 s390_build_builtin_va_list (void)
11904 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
11906 record = lang_hooks.types.make_type (RECORD_TYPE);
11909 build_decl (BUILTINS_LOCATION,
11910 TYPE_DECL, get_identifier ("__va_list_tag"), record);
11912 f_gpr = build_decl (BUILTINS_LOCATION,
11913 FIELD_DECL, get_identifier ("__gpr"),
11914 long_integer_type_node);
11915 f_fpr = build_decl (BUILTINS_LOCATION,
11916 FIELD_DECL, get_identifier ("__fpr"),
11917 long_integer_type_node);
11918 f_ovf = build_decl (BUILTINS_LOCATION,
11919 FIELD_DECL, get_identifier ("__overflow_arg_area"),
11921 f_sav = build_decl (BUILTINS_LOCATION,
11922 FIELD_DECL, get_identifier ("__reg_save_area"),
11925 va_list_gpr_counter_field = f_gpr;
11926 va_list_fpr_counter_field = f_fpr;
11928 DECL_FIELD_CONTEXT (f_gpr) = record;
11929 DECL_FIELD_CONTEXT (f_fpr) = record;
11930 DECL_FIELD_CONTEXT (f_ovf) = record;
11931 DECL_FIELD_CONTEXT (f_sav) = record;
11933 TYPE_STUB_DECL (record) = type_decl;
11934 TYPE_NAME (record) = type_decl;
11935 TYPE_FIELDS (record) = f_gpr;
11936 DECL_CHAIN (f_gpr) = f_fpr;
11937 DECL_CHAIN (f_fpr) = f_ovf;
11938 DECL_CHAIN (f_ovf) = f_sav;
11940 layout_type (record);
11942 /* The correct type is an array type of one element. */
11943 return build_array_type (record, build_index_type (size_zero_node));
11946 /* Implement va_start by filling the va_list structure VALIST.
11947 STDARG_P is always true, and ignored.
11948 NEXTARG points to the first anonymous stack argument.
11950 The following global variables are used to initialize
11951 the va_list structure:
11954 holds number of gprs and fprs used for named arguments.
11955 crtl->args.arg_offset_rtx:
11956 holds the offset of the first anonymous stack argument
11957 (relative to the virtual arg pointer). */
11960 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
11962 HOST_WIDE_INT n_gpr, n_fpr;
11964 tree f_gpr, f_fpr, f_ovf, f_sav;
11965 tree gpr, fpr, ovf, sav, t;
11967 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11968 f_fpr = DECL_CHAIN (f_gpr);
11969 f_ovf = DECL_CHAIN (f_fpr);
11970 f_sav = DECL_CHAIN (f_ovf);
11972 valist = build_simple_mem_ref (valist);
11973 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11974 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11975 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11976 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11978 /* Count number of gp and fp argument registers used. */
11980 n_gpr = crtl->args.info.gprs;
11981 n_fpr = crtl->args.info.fprs;
11983 if (cfun->va_list_gpr_size)
11985 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11986 build_int_cst (NULL_TREE, n_gpr));
11987 TREE_SIDE_EFFECTS (t) = 1;
11988 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11991 if (cfun->va_list_fpr_size)
11993 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11994 build_int_cst (NULL_TREE, n_fpr));
11995 TREE_SIDE_EFFECTS (t) = 1;
11996 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11999 if (flag_split_stack
12000 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12002 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12007 reg = gen_reg_rtx (Pmode);
12008 cfun->machine->split_stack_varargs_pointer = reg;
12011 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12012 seq = get_insns ();
12015 push_topmost_sequence ();
12016 emit_insn_after (seq, entry_of_function ());
12017 pop_topmost_sequence ();
12020 /* Find the overflow area.
12021 FIXME: This currently is too pessimistic when the vector ABI is
12022 enabled. In that case we *always* set up the overflow area
12024 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12025 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12028 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12029 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12031 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12033 off = INTVAL (crtl->args.arg_offset_rtx);
12034 off = off < 0 ? 0 : off;
12035 if (TARGET_DEBUG_ARG)
12036 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12037 (int)n_gpr, (int)n_fpr, off);
12039 t = fold_build_pointer_plus_hwi (t, off);
12041 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12042 TREE_SIDE_EFFECTS (t) = 1;
12043 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12046 /* Find the register save area. */
12047 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12048 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12050 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12051 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12053 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12054 TREE_SIDE_EFFECTS (t) = 1;
12055 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12059 /* Implement va_arg by updating the va_list structure
12060 VALIST as required to retrieve an argument of type
12061 TYPE, and returning that argument.
12063 Generates code equivalent to:
12065 if (integral value) {
12066 if (size <= 4 && args.gpr < 5 ||
12067 size > 4 && args.gpr < 4 )
12068 ret = args.reg_save_area[args.gpr+8]
12070 ret = *args.overflow_arg_area++;
12071 } else if (vector value) {
12072 ret = *args.overflow_arg_area;
12073 args.overflow_arg_area += size / 8;
12074 } else if (float value) {
12076 ret = args.reg_save_area[args.fpr+64]
12078 ret = *args.overflow_arg_area++;
12079 } else if (aggregate value) {
12081 ret = *args.reg_save_area[args.gpr]
12083 ret = **args.overflow_arg_area++;
12087 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12088 gimple_seq *post_p ATTRIBUTE_UNUSED)
12090 tree f_gpr, f_fpr, f_ovf, f_sav;
12091 tree gpr, fpr, ovf, sav, reg, t, u;
12092 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12093 tree lab_false, lab_over = NULL_TREE;
12094 tree addr = create_tmp_var (ptr_type_node, "addr");
12095 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12098 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12099 f_fpr = DECL_CHAIN (f_gpr);
12100 f_ovf = DECL_CHAIN (f_fpr);
12101 f_sav = DECL_CHAIN (f_ovf);
12103 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12104 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12105 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12107 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12108 both appear on a lhs. */
12109 valist = unshare_expr (valist);
12110 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12112 size = int_size_in_bytes (type);
12114 s390_check_type_for_vector_abi (type, true, false);
12116 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12118 if (TARGET_DEBUG_ARG)
12120 fprintf (stderr, "va_arg: aggregate type");
12124 /* Aggregates are passed by reference. */
12129 /* kernel stack layout on 31 bit: It is assumed here that no padding
12130 will be added by s390_frame_info because for va_args always an even
12131 number of gprs has to be saved r15-r2 = 14 regs. */
12132 sav_ofs = 2 * UNITS_PER_LONG;
12133 sav_scale = UNITS_PER_LONG;
12134 size = UNITS_PER_LONG;
12135 max_reg = GP_ARG_NUM_REG - n_reg;
12136 left_align_p = false;
12138 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12140 if (TARGET_DEBUG_ARG)
12142 fprintf (stderr, "va_arg: vector type");
12152 left_align_p = true;
12154 else if (s390_function_arg_float (TYPE_MODE (type), type))
12156 if (TARGET_DEBUG_ARG)
12158 fprintf (stderr, "va_arg: float type");
12162 /* FP args go in FP registers, if present. */
12166 sav_ofs = 16 * UNITS_PER_LONG;
12168 max_reg = FP_ARG_NUM_REG - n_reg;
12169 left_align_p = false;
12173 if (TARGET_DEBUG_ARG)
12175 fprintf (stderr, "va_arg: other type");
12179 /* Otherwise into GP registers. */
12182 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12184 /* kernel stack layout on 31 bit: It is assumed here that no padding
12185 will be added by s390_frame_info because for va_args always an even
12186 number of gprs has to be saved r15-r2 = 14 regs. */
12187 sav_ofs = 2 * UNITS_PER_LONG;
12189 if (size < UNITS_PER_LONG)
12190 sav_ofs += UNITS_PER_LONG - size;
12192 sav_scale = UNITS_PER_LONG;
12193 max_reg = GP_ARG_NUM_REG - n_reg;
12194 left_align_p = false;
12197 /* Pull the value out of the saved registers ... */
12199 if (reg != NULL_TREE)
12202 if (reg > ((typeof (reg))max_reg))
12205 addr = sav + sav_ofs + reg * save_scale;
12212 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12213 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12215 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12216 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12217 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12218 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12219 gimplify_and_add (t, pre_p);
12221 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12222 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12223 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12224 t = fold_build_pointer_plus (t, u);
12226 gimplify_assign (addr, t, pre_p);
12228 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12230 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12233 /* ... Otherwise out of the overflow area. */
12236 if (size < UNITS_PER_LONG && !left_align_p)
12237 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12239 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12241 gimplify_assign (addr, t, pre_p);
12243 if (size < UNITS_PER_LONG && left_align_p)
12244 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12246 t = fold_build_pointer_plus_hwi (t, size);
12248 gimplify_assign (ovf, t, pre_p);
12250 if (reg != NULL_TREE)
12251 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12254 /* Increment register save count. */
12258 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12259 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12260 gimplify_and_add (u, pre_p);
12265 t = build_pointer_type_for_mode (build_pointer_type (type),
12267 addr = fold_convert (t, addr);
12268 addr = build_va_arg_indirect_ref (addr);
12272 t = build_pointer_type_for_mode (type, ptr_mode, true);
12273 addr = fold_convert (t, addr);
12276 return build_va_arg_indirect_ref (addr);
12279 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12281 DEST - Register location where CC will be stored.
12282 TDB - Pointer to a 256 byte area where to store the transaction.
12283 diagnostic block. NULL if TDB is not needed.
12284 RETRY - Retry count value. If non-NULL a retry loop for CC2
12286 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12287 of the tbegin instruction pattern. */
12290 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12292 rtx retry_plus_two = gen_reg_rtx (SImode);
12293 rtx retry_reg = gen_reg_rtx (SImode);
12294 rtx_code_label *retry_label = NULL;
12296 if (retry != NULL_RTX)
12298 emit_move_insn (retry_reg, retry);
12299 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12300 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12301 retry_label = gen_label_rtx ();
12302 emit_label (retry_label);
12305 if (clobber_fprs_p)
12308 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12311 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12315 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12318 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12319 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12321 UNSPEC_CC_TO_INT));
12322 if (retry != NULL_RTX)
12324 const int CC0 = 1 << 3;
12325 const int CC1 = 1 << 2;
12326 const int CC3 = 1 << 0;
12328 rtx count = gen_reg_rtx (SImode);
12329 rtx_code_label *leave_label = gen_label_rtx ();
12331 /* Exit for success and permanent failures. */
12332 jump = s390_emit_jump (leave_label,
12333 gen_rtx_EQ (VOIDmode,
12334 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12335 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12336 LABEL_NUSES (leave_label) = 1;
12338 /* CC2 - transient failure. Perform retry with ppa. */
12339 emit_move_insn (count, retry_plus_two);
12340 emit_insn (gen_subsi3 (count, count, retry_reg));
12341 emit_insn (gen_tx_assist (count));
12342 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12345 JUMP_LABEL (jump) = retry_label;
12346 LABEL_NUSES (retry_label) = 1;
12347 emit_label (leave_label);
12352 /* Return the decl for the target specific builtin with the function
12356 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12358 if (fcode >= S390_BUILTIN_MAX)
12359 return error_mark_node;
12361 return s390_builtin_decls[fcode];
12364 /* We call mcount before the function prologue. So a profiled leaf
12365 function should stay a leaf function. */
12368 s390_keep_leaf_when_profiled ()
12373 /* Output assembly code for the trampoline template to
12376 On S/390, we use gpr 1 internally in the trampoline code;
12377 gpr 0 is used to hold the static chain. */
12380 s390_asm_trampoline_template (FILE *file)
12383 op[0] = gen_rtx_REG (Pmode, 0);
12384 op[1] = gen_rtx_REG (Pmode, 1);
12388 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12389 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
12390 output_asm_insn ("br\t%1", op); /* 2 byte */
12391 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12395 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12396 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
12397 output_asm_insn ("br\t%1", op); /* 2 byte */
12398 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12402 /* Emit RTL insns to initialize the variable parts of a trampoline.
12403 FNADDR is an RTX for the address of the function's pure code.
12404 CXT is an RTX for the static chain value for the function. */
12407 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12409 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12412 emit_block_move (m_tramp, assemble_trampoline_template (),
12413 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12415 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12416 emit_move_insn (mem, cxt);
12417 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12418 emit_move_insn (mem, fnaddr);
12421 /* Output assembler code to FILE to increment profiler label # LABELNO
12422 for profiling a function entry. */
12425 s390_function_profiler (FILE *file, int labelno)
12430 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12432 fprintf (file, "# function profiler \n");
12434 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12435 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12436 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12438 op[2] = gen_rtx_REG (Pmode, 1);
12439 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12440 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12442 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
12445 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12446 op[4] = gen_rtx_CONST (Pmode, op[4]);
12451 output_asm_insn ("stg\t%0,%1", op);
12452 output_asm_insn ("larl\t%2,%3", op);
12453 output_asm_insn ("brasl\t%0,%4", op);
12454 output_asm_insn ("lg\t%0,%1", op);
12456 else if (TARGET_CPU_ZARCH)
12458 output_asm_insn ("st\t%0,%1", op);
12459 output_asm_insn ("larl\t%2,%3", op);
12460 output_asm_insn ("brasl\t%0,%4", op);
12461 output_asm_insn ("l\t%0,%1", op);
12463 else if (!flag_pic)
12465 op[6] = gen_label_rtx ();
12467 output_asm_insn ("st\t%0,%1", op);
12468 output_asm_insn ("bras\t%2,%l6", op);
12469 output_asm_insn (".long\t%4", op);
12470 output_asm_insn (".long\t%3", op);
12471 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12472 output_asm_insn ("l\t%0,0(%2)", op);
12473 output_asm_insn ("l\t%2,4(%2)", op);
12474 output_asm_insn ("basr\t%0,%0", op);
12475 output_asm_insn ("l\t%0,%1", op);
12479 op[5] = gen_label_rtx ();
12480 op[6] = gen_label_rtx ();
12482 output_asm_insn ("st\t%0,%1", op);
12483 output_asm_insn ("bras\t%2,%l6", op);
12484 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
12485 output_asm_insn (".long\t%4-%l5", op);
12486 output_asm_insn (".long\t%3-%l5", op);
12487 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12488 output_asm_insn ("lr\t%0,%2", op);
12489 output_asm_insn ("a\t%0,0(%2)", op);
12490 output_asm_insn ("a\t%2,4(%2)", op);
12491 output_asm_insn ("basr\t%0,%0", op);
12492 output_asm_insn ("l\t%0,%1", op);
12496 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
12497 into its SYMBOL_REF_FLAGS. */
12500 s390_encode_section_info (tree decl, rtx rtl, int first)
12502 default_encode_section_info (decl, rtl, first);
12504 if (TREE_CODE (decl) == VAR_DECL)
12506 /* Store the alignment to be able to check if we can use
12507 a larl/load-relative instruction. We only handle the cases
12508 that can go wrong (i.e. no FUNC_DECLs). */
12509 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
12510 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12511 else if (DECL_ALIGN (decl) % 32)
12512 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12513 else if (DECL_ALIGN (decl) % 64)
12514 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12517 /* Literal pool references don't have a decl so they are handled
12518 differently here. We rely on the information in the MEM_ALIGN
12519 entry to decide upon the alignment. */
12521 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
12522 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
12524 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
12525 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12526 else if (MEM_ALIGN (rtl) % 32)
12527 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12528 else if (MEM_ALIGN (rtl) % 64)
12529 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12533 /* Output thunk to FILE that implements a C++ virtual function call (with
12534 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
12535 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
12536 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
12537 relative to the resulting this pointer. */
12540 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
12541 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12547 /* Make sure unwind info is emitted for the thunk if needed. */
12548 final_start_function (emit_barrier (), file, 1);
12550 /* Operand 0 is the target function. */
12551 op[0] = XEXP (DECL_RTL (function), 0);
12552 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
12555 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
12556 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
12557 op[0] = gen_rtx_CONST (Pmode, op[0]);
12560 /* Operand 1 is the 'this' pointer. */
12561 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12562 op[1] = gen_rtx_REG (Pmode, 3);
12564 op[1] = gen_rtx_REG (Pmode, 2);
12566 /* Operand 2 is the delta. */
12567 op[2] = GEN_INT (delta);
12569 /* Operand 3 is the vcall_offset. */
12570 op[3] = GEN_INT (vcall_offset);
12572 /* Operand 4 is the temporary register. */
12573 op[4] = gen_rtx_REG (Pmode, 1);
12575 /* Operands 5 to 8 can be used as labels. */
12581 /* Operand 9 can be used for temporary register. */
12584 /* Generate code. */
12587 /* Setup literal pool pointer if required. */
12588 if ((!DISP_IN_RANGE (delta)
12589 && !CONST_OK_FOR_K (delta)
12590 && !CONST_OK_FOR_Os (delta))
12591 || (!DISP_IN_RANGE (vcall_offset)
12592 && !CONST_OK_FOR_K (vcall_offset)
12593 && !CONST_OK_FOR_Os (vcall_offset)))
12595 op[5] = gen_label_rtx ();
12596 output_asm_insn ("larl\t%4,%5", op);
12599 /* Add DELTA to this pointer. */
12602 if (CONST_OK_FOR_J (delta))
12603 output_asm_insn ("la\t%1,%2(%1)", op);
12604 else if (DISP_IN_RANGE (delta))
12605 output_asm_insn ("lay\t%1,%2(%1)", op);
12606 else if (CONST_OK_FOR_K (delta))
12607 output_asm_insn ("aghi\t%1,%2", op);
12608 else if (CONST_OK_FOR_Os (delta))
12609 output_asm_insn ("agfi\t%1,%2", op);
12612 op[6] = gen_label_rtx ();
12613 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
12617 /* Perform vcall adjustment. */
12620 if (DISP_IN_RANGE (vcall_offset))
12622 output_asm_insn ("lg\t%4,0(%1)", op);
12623 output_asm_insn ("ag\t%1,%3(%4)", op);
12625 else if (CONST_OK_FOR_K (vcall_offset))
12627 output_asm_insn ("lghi\t%4,%3", op);
12628 output_asm_insn ("ag\t%4,0(%1)", op);
12629 output_asm_insn ("ag\t%1,0(%4)", op);
12631 else if (CONST_OK_FOR_Os (vcall_offset))
12633 output_asm_insn ("lgfi\t%4,%3", op);
12634 output_asm_insn ("ag\t%4,0(%1)", op);
12635 output_asm_insn ("ag\t%1,0(%4)", op);
12639 op[7] = gen_label_rtx ();
12640 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
12641 output_asm_insn ("ag\t%4,0(%1)", op);
12642 output_asm_insn ("ag\t%1,0(%4)", op);
12646 /* Jump to target. */
12647 output_asm_insn ("jg\t%0", op);
12649 /* Output literal pool if required. */
12652 output_asm_insn (".align\t4", op);
12653 targetm.asm_out.internal_label (file, "L",
12654 CODE_LABEL_NUMBER (op[5]));
12658 targetm.asm_out.internal_label (file, "L",
12659 CODE_LABEL_NUMBER (op[6]));
12660 output_asm_insn (".long\t%2", op);
12664 targetm.asm_out.internal_label (file, "L",
12665 CODE_LABEL_NUMBER (op[7]));
12666 output_asm_insn (".long\t%3", op);
12671 /* Setup base pointer if required. */
12673 || (!DISP_IN_RANGE (delta)
12674 && !CONST_OK_FOR_K (delta)
12675 && !CONST_OK_FOR_Os (delta))
12676 || (!DISP_IN_RANGE (delta)
12677 && !CONST_OK_FOR_K (vcall_offset)
12678 && !CONST_OK_FOR_Os (vcall_offset)))
12680 op[5] = gen_label_rtx ();
12681 output_asm_insn ("basr\t%4,0", op);
12682 targetm.asm_out.internal_label (file, "L",
12683 CODE_LABEL_NUMBER (op[5]));
12686 /* Add DELTA to this pointer. */
12689 if (CONST_OK_FOR_J (delta))
12690 output_asm_insn ("la\t%1,%2(%1)", op);
12691 else if (DISP_IN_RANGE (delta))
12692 output_asm_insn ("lay\t%1,%2(%1)", op);
12693 else if (CONST_OK_FOR_K (delta))
12694 output_asm_insn ("ahi\t%1,%2", op);
12695 else if (CONST_OK_FOR_Os (delta))
12696 output_asm_insn ("afi\t%1,%2", op);
12699 op[6] = gen_label_rtx ();
12700 output_asm_insn ("a\t%1,%6-%5(%4)", op);
12704 /* Perform vcall adjustment. */
12707 if (CONST_OK_FOR_J (vcall_offset))
12709 output_asm_insn ("l\t%4,0(%1)", op);
12710 output_asm_insn ("a\t%1,%3(%4)", op);
12712 else if (DISP_IN_RANGE (vcall_offset))
12714 output_asm_insn ("l\t%4,0(%1)", op);
12715 output_asm_insn ("ay\t%1,%3(%4)", op);
12717 else if (CONST_OK_FOR_K (vcall_offset))
12719 output_asm_insn ("lhi\t%4,%3", op);
12720 output_asm_insn ("a\t%4,0(%1)", op);
12721 output_asm_insn ("a\t%1,0(%4)", op);
12723 else if (CONST_OK_FOR_Os (vcall_offset))
12725 output_asm_insn ("iilf\t%4,%3", op);
12726 output_asm_insn ("a\t%4,0(%1)", op);
12727 output_asm_insn ("a\t%1,0(%4)", op);
12731 op[7] = gen_label_rtx ();
12732 output_asm_insn ("l\t%4,%7-%5(%4)", op);
12733 output_asm_insn ("a\t%4,0(%1)", op);
12734 output_asm_insn ("a\t%1,0(%4)", op);
12737 /* We had to clobber the base pointer register.
12738 Re-setup the base pointer (with a different base). */
12739 op[5] = gen_label_rtx ();
12740 output_asm_insn ("basr\t%4,0", op);
12741 targetm.asm_out.internal_label (file, "L",
12742 CODE_LABEL_NUMBER (op[5]));
12745 /* Jump to target. */
12746 op[8] = gen_label_rtx ();
12749 output_asm_insn ("l\t%4,%8-%5(%4)", op);
12750 else if (!nonlocal)
12751 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12752 /* We cannot call through .plt, since .plt requires %r12 loaded. */
12753 else if (flag_pic == 1)
12755 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12756 output_asm_insn ("l\t%4,%0(%4)", op);
12758 else if (flag_pic == 2)
12760 op[9] = gen_rtx_REG (Pmode, 0);
12761 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
12762 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12763 output_asm_insn ("ar\t%4,%9", op);
12764 output_asm_insn ("l\t%4,0(%4)", op);
12767 output_asm_insn ("br\t%4", op);
12769 /* Output literal pool. */
12770 output_asm_insn (".align\t4", op);
12772 if (nonlocal && flag_pic == 2)
12773 output_asm_insn (".long\t%0", op);
12776 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
12777 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
12780 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
12782 output_asm_insn (".long\t%0", op);
12784 output_asm_insn (".long\t%0-%5", op);
12788 targetm.asm_out.internal_label (file, "L",
12789 CODE_LABEL_NUMBER (op[6]));
12790 output_asm_insn (".long\t%2", op);
12794 targetm.asm_out.internal_label (file, "L",
12795 CODE_LABEL_NUMBER (op[7]));
12796 output_asm_insn (".long\t%3", op);
12799 final_end_function ();
12803 s390_valid_pointer_mode (machine_mode mode)
12805 return (mode == SImode || (TARGET_64BIT && mode == DImode));
12808 /* Checks whether the given CALL_EXPR would use a caller
12809 saved register. This is used to decide whether sibling call
12810 optimization could be performed on the respective function
12814 s390_call_saved_register_used (tree call_expr)
12816 CUMULATIVE_ARGS cum_v;
12817 cumulative_args_t cum;
12824 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
12825 cum = pack_cumulative_args (&cum_v);
12827 for (i = 0; i < call_expr_nargs (call_expr); i++)
12829 parameter = CALL_EXPR_ARG (call_expr, i);
12830 gcc_assert (parameter);
12832 /* For an undeclared variable passed as parameter we will get
12833 an ERROR_MARK node here. */
12834 if (TREE_CODE (parameter) == ERROR_MARK)
12837 type = TREE_TYPE (parameter);
12840 mode = TYPE_MODE (type);
12843 /* We assume that in the target function all parameters are
12844 named. This only has an impact on vector argument register
12845 usage none of which is call-saved. */
12846 if (pass_by_reference (&cum_v, mode, type, true))
12849 type = build_pointer_type (type);
12852 parm_rtx = s390_function_arg (cum, mode, type, true);
12854 s390_function_arg_advance (cum, mode, type, true);
12859 if (REG_P (parm_rtx))
12862 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
12864 if (!call_used_regs[reg + REGNO (parm_rtx)])
12868 if (GET_CODE (parm_rtx) == PARALLEL)
12872 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
12874 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
12876 gcc_assert (REG_P (r));
12879 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
12881 if (!call_used_regs[reg + REGNO (r)])
12890 /* Return true if the given call expression can be
12891 turned into a sibling call.
12892 DECL holds the declaration of the function to be called whereas
12893 EXP is the call expression itself. */
12896 s390_function_ok_for_sibcall (tree decl, tree exp)
12898 /* The TPF epilogue uses register 1. */
12899 if (TARGET_TPF_PROFILING)
12902 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
12903 which would have to be restored before the sibcall. */
12904 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
12907 /* Register 6 on s390 is available as an argument register but unfortunately
12908 "caller saved". This makes functions needing this register for arguments
12909 not suitable for sibcalls. */
12910 return !s390_call_saved_register_used (exp);
12913 /* Return the fixed registers used for condition codes. */
12916 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
12919 *p2 = INVALID_REGNUM;
12924 /* This function is used by the call expanders of the machine description.
12925 It emits the call insn itself together with the necessary operations
12926 to adjust the target address and returns the emitted insn.
12927 ADDR_LOCATION is the target address rtx
12928 TLS_CALL the location of the thread-local symbol
12929 RESULT_REG the register where the result of the call should be stored
12930 RETADDR_REG the register where the return address should be stored
12931 If this parameter is NULL_RTX the call is considered
12932 to be a sibling call. */
12935 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
12938 bool plt_call = false;
12944 /* Direct function calls need special treatment. */
12945 if (GET_CODE (addr_location) == SYMBOL_REF)
12947 /* When calling a global routine in PIC mode, we must
12948 replace the symbol itself with the PLT stub. */
12949 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
12951 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
12953 addr_location = gen_rtx_UNSPEC (Pmode,
12954 gen_rtvec (1, addr_location),
12956 addr_location = gen_rtx_CONST (Pmode, addr_location);
12960 /* For -fpic code the PLT entries might use r12 which is
12961 call-saved. Therefore we cannot do a sibcall when
12962 calling directly using a symbol ref. When reaching
12963 this point we decided (in s390_function_ok_for_sibcall)
12964 to do a sibcall for a function pointer but one of the
12965 optimizers was able to get rid of the function pointer
12966 by propagating the symbol ref into the call. This
12967 optimization is illegal for S/390 so we turn the direct
12968 call into a indirect call again. */
12969 addr_location = force_reg (Pmode, addr_location);
12972 /* Unless we can use the bras(l) insn, force the
12973 routine address into a register. */
12974 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
12977 addr_location = legitimize_pic_address (addr_location, 0);
12979 addr_location = force_reg (Pmode, addr_location);
12983 /* If it is already an indirect call or the code above moved the
12984 SYMBOL_REF to somewhere else make sure the address can be found in
12986 if (retaddr_reg == NULL_RTX
12987 && GET_CODE (addr_location) != SYMBOL_REF
12990 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
12991 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
12994 addr_location = gen_rtx_MEM (QImode, addr_location);
12995 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
12997 if (result_reg != NULL_RTX)
12998 call = gen_rtx_SET (result_reg, call);
13000 if (retaddr_reg != NULL_RTX)
13002 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13004 if (tls_call != NULL_RTX)
13005 vec = gen_rtvec (3, call, clobber,
13006 gen_rtx_USE (VOIDmode, tls_call));
13008 vec = gen_rtvec (2, call, clobber);
13010 call = gen_rtx_PARALLEL (VOIDmode, vec);
13013 insn = emit_call_insn (call);
13015 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13016 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13018 /* s390_function_ok_for_sibcall should
13019 have denied sibcalls in this case. */
13020 gcc_assert (retaddr_reg != NULL_RTX);
13021 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13026 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13029 s390_conditional_register_usage (void)
13035 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13036 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13038 if (TARGET_CPU_ZARCH)
13040 fixed_regs[BASE_REGNUM] = 0;
13041 call_used_regs[BASE_REGNUM] = 0;
13042 fixed_regs[RETURN_REGNUM] = 0;
13043 call_used_regs[RETURN_REGNUM] = 0;
13047 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13048 call_used_regs[i] = call_really_used_regs[i] = 0;
13052 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13053 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13056 if (TARGET_SOFT_FLOAT)
13058 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13059 call_used_regs[i] = fixed_regs[i] = 1;
13062 /* Disable v16 - v31 for non-vector target. */
13065 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13066 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13070 /* Corresponding function to eh_return expander. */
13072 static GTY(()) rtx s390_tpf_eh_return_symbol;
13074 s390_emit_tpf_eh_return (rtx target)
13079 if (!s390_tpf_eh_return_symbol)
13080 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13082 reg = gen_rtx_REG (Pmode, 2);
13083 orig_ra = gen_rtx_REG (Pmode, 3);
13085 emit_move_insn (reg, target);
13086 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13087 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13088 gen_rtx_REG (Pmode, RETURN_REGNUM));
13089 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13090 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13092 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13095 /* Rework the prologue/epilogue to avoid saving/restoring
13096 registers unnecessarily. */
13099 s390_optimize_prologue (void)
13101 rtx_insn *insn, *new_insn, *next_insn;
13103 /* Do a final recompute of the frame-related data. */
13104 s390_optimize_register_info ();
13106 /* If all special registers are in fact used, there's nothing we
13107 can do, so no point in walking the insn list. */
13109 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13110 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
13111 && (TARGET_CPU_ZARCH
13112 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
13113 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
13116 /* Search for prologue/epilogue insns and replace them. */
13118 for (insn = get_insns (); insn; insn = next_insn)
13120 int first, last, off;
13121 rtx set, base, offset;
13124 next_insn = NEXT_INSN (insn);
13126 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13129 pat = PATTERN (insn);
13131 /* Remove ldgr/lgdr instructions used for saving and restore
13132 GPRs if possible. */
13137 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13138 tmp_pat = XVECEXP (pat, 0, 0);
13140 if (GET_CODE (tmp_pat) == SET
13141 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13142 && REG_P (SET_SRC (tmp_pat))
13143 && REG_P (SET_DEST (tmp_pat)))
13145 int src_regno = REGNO (SET_SRC (tmp_pat));
13146 int dest_regno = REGNO (SET_DEST (tmp_pat));
13150 if (!((GENERAL_REGNO_P (src_regno)
13151 && FP_REGNO_P (dest_regno))
13152 || (FP_REGNO_P (src_regno)
13153 && GENERAL_REGNO_P (dest_regno))))
13156 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13157 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13159 /* GPR must be call-saved, FPR must be call-clobbered. */
13160 if (!call_really_used_regs[fpr_regno]
13161 || call_really_used_regs[gpr_regno])
13164 /* It must not happen that what we once saved in an FPR now
13165 needs a stack slot. */
13166 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13168 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13170 remove_insn (insn);
13176 if (GET_CODE (pat) == PARALLEL
13177 && store_multiple_operation (pat, VOIDmode))
13179 set = XVECEXP (pat, 0, 0);
13180 first = REGNO (SET_SRC (set));
13181 last = first + XVECLEN (pat, 0) - 1;
13182 offset = const0_rtx;
13183 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13184 off = INTVAL (offset);
13186 if (GET_CODE (base) != REG || off < 0)
13188 if (cfun_frame_layout.first_save_gpr != -1
13189 && (cfun_frame_layout.first_save_gpr < first
13190 || cfun_frame_layout.last_save_gpr > last))
13192 if (REGNO (base) != STACK_POINTER_REGNUM
13193 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13195 if (first > BASE_REGNUM || last < BASE_REGNUM)
13198 if (cfun_frame_layout.first_save_gpr != -1)
13200 rtx s_pat = save_gprs (base,
13201 off + (cfun_frame_layout.first_save_gpr
13202 - first) * UNITS_PER_LONG,
13203 cfun_frame_layout.first_save_gpr,
13204 cfun_frame_layout.last_save_gpr);
13205 new_insn = emit_insn_before (s_pat, insn);
13206 INSN_ADDRESSES_NEW (new_insn, -1);
13209 remove_insn (insn);
13213 if (cfun_frame_layout.first_save_gpr == -1
13214 && GET_CODE (pat) == SET
13215 && GENERAL_REG_P (SET_SRC (pat))
13216 && GET_CODE (SET_DEST (pat)) == MEM)
13219 first = REGNO (SET_SRC (set));
13220 offset = const0_rtx;
13221 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13222 off = INTVAL (offset);
13224 if (GET_CODE (base) != REG || off < 0)
13226 if (REGNO (base) != STACK_POINTER_REGNUM
13227 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13230 remove_insn (insn);
13234 if (GET_CODE (pat) == PARALLEL
13235 && load_multiple_operation (pat, VOIDmode))
13237 set = XVECEXP (pat, 0, 0);
13238 first = REGNO (SET_DEST (set));
13239 last = first + XVECLEN (pat, 0) - 1;
13240 offset = const0_rtx;
13241 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13242 off = INTVAL (offset);
13244 if (GET_CODE (base) != REG || off < 0)
13247 if (cfun_frame_layout.first_restore_gpr != -1
13248 && (cfun_frame_layout.first_restore_gpr < first
13249 || cfun_frame_layout.last_restore_gpr > last))
13251 if (REGNO (base) != STACK_POINTER_REGNUM
13252 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13254 if (first > BASE_REGNUM || last < BASE_REGNUM)
13257 if (cfun_frame_layout.first_restore_gpr != -1)
13259 rtx rpat = restore_gprs (base,
13260 off + (cfun_frame_layout.first_restore_gpr
13261 - first) * UNITS_PER_LONG,
13262 cfun_frame_layout.first_restore_gpr,
13263 cfun_frame_layout.last_restore_gpr);
13265 /* Remove REG_CFA_RESTOREs for registers that we no
13266 longer need to save. */
13267 REG_NOTES (rpat) = REG_NOTES (insn);
13268 for (rtx *ptr = ®_NOTES (rpat); *ptr; )
13269 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13270 && ((int) REGNO (XEXP (*ptr, 0))
13271 < cfun_frame_layout.first_restore_gpr))
13272 *ptr = XEXP (*ptr, 1);
13274 ptr = &XEXP (*ptr, 1);
13275 new_insn = emit_insn_before (rpat, insn);
13276 RTX_FRAME_RELATED_P (new_insn) = 1;
13277 INSN_ADDRESSES_NEW (new_insn, -1);
13280 remove_insn (insn);
13284 if (cfun_frame_layout.first_restore_gpr == -1
13285 && GET_CODE (pat) == SET
13286 && GENERAL_REG_P (SET_DEST (pat))
13287 && GET_CODE (SET_SRC (pat)) == MEM)
13290 first = REGNO (SET_DEST (set));
13291 offset = const0_rtx;
13292 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13293 off = INTVAL (offset);
13295 if (GET_CODE (base) != REG || off < 0)
13298 if (REGNO (base) != STACK_POINTER_REGNUM
13299 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13302 remove_insn (insn);
13308 /* On z10 and later the dynamic branch prediction must see the
13309 backward jump within a certain windows. If not it falls back to
13310 the static prediction. This function rearranges the loop backward
13311 branch in a way which makes the static prediction always correct.
13312 The function returns true if it added an instruction. */
13314 s390_fix_long_loop_prediction (rtx_insn *insn)
13316 rtx set = single_set (insn);
13317 rtx code_label, label_ref;
13318 rtx_insn *uncond_jump;
13319 rtx_insn *cur_insn;
13323 /* This will exclude branch on count and branch on index patterns
13324 since these are correctly statically predicted. */
13326 || SET_DEST (set) != pc_rtx
13327 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13330 /* Skip conditional returns. */
13331 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13332 && XEXP (SET_SRC (set), 2) == pc_rtx)
13335 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13336 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13338 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13340 code_label = XEXP (label_ref, 0);
13342 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13343 || INSN_ADDRESSES (INSN_UID (insn)) == -1
13344 || (INSN_ADDRESSES (INSN_UID (insn))
13345 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13348 for (distance = 0, cur_insn = PREV_INSN (insn);
13349 distance < PREDICT_DISTANCE - 6;
13350 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13351 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13354 rtx_code_label *new_label = gen_label_rtx ();
13355 uncond_jump = emit_jump_insn_after (
13356 gen_rtx_SET (pc_rtx,
13357 gen_rtx_LABEL_REF (VOIDmode, code_label)),
13359 emit_label_after (new_label, uncond_jump);
13361 tmp = XEXP (SET_SRC (set), 1);
13362 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13363 XEXP (SET_SRC (set), 2) = tmp;
13364 INSN_CODE (insn) = -1;
13366 XEXP (label_ref, 0) = new_label;
13367 JUMP_LABEL (insn) = new_label;
13368 JUMP_LABEL (uncond_jump) = code_label;
13373 /* Returns 1 if INSN reads the value of REG for purposes not related
13374 to addressing of memory, and 0 otherwise. */
13376 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
13378 return reg_referenced_p (reg, PATTERN (insn))
13379 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
13382 /* Starting from INSN find_cond_jump looks downwards in the insn
13383 stream for a single jump insn which is the last user of the
13384 condition code set in INSN. */
13386 find_cond_jump (rtx_insn *insn)
13388 for (; insn; insn = NEXT_INSN (insn))
13392 if (LABEL_P (insn))
13395 if (!JUMP_P (insn))
13397 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
13402 /* This will be triggered by a return. */
13403 if (GET_CODE (PATTERN (insn)) != SET)
13406 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
13407 ite = SET_SRC (PATTERN (insn));
13409 if (GET_CODE (ite) != IF_THEN_ELSE)
13412 cc = XEXP (XEXP (ite, 0), 0);
13413 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
13416 if (find_reg_note (insn, REG_DEAD, cc))
13424 /* Swap the condition in COND and the operands in OP0 and OP1 so that
13425 the semantics does not change. If NULL_RTX is passed as COND the
13426 function tries to find the conditional jump starting with INSN. */
13428 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
13432 if (cond == NULL_RTX)
13434 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
13435 rtx set = jump ? single_set (jump) : NULL_RTX;
13437 if (set == NULL_RTX)
13440 cond = XEXP (SET_SRC (set), 0);
13445 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
13448 /* On z10, instructions of the compare-and-branch family have the
13449 property to access the register occurring as second operand with
13450 its bits complemented. If such a compare is grouped with a second
13451 instruction that accesses the same register non-complemented, and
13452 if that register's value is delivered via a bypass, then the
13453 pipeline recycles, thereby causing significant performance decline.
13454 This function locates such situations and exchanges the two
13455 operands of the compare. The function return true whenever it
13458 s390_z10_optimize_cmp (rtx_insn *insn)
13460 rtx_insn *prev_insn, *next_insn;
13461 bool insn_added_p = false;
13462 rtx cond, *op0, *op1;
13464 if (GET_CODE (PATTERN (insn)) == PARALLEL)
13466 /* Handle compare and branch and branch on count
13468 rtx pattern = single_set (insn);
13471 || SET_DEST (pattern) != pc_rtx
13472 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
13475 cond = XEXP (SET_SRC (pattern), 0);
13476 op0 = &XEXP (cond, 0);
13477 op1 = &XEXP (cond, 1);
13479 else if (GET_CODE (PATTERN (insn)) == SET)
13483 /* Handle normal compare instructions. */
13484 src = SET_SRC (PATTERN (insn));
13485 dest = SET_DEST (PATTERN (insn));
13488 || !CC_REGNO_P (REGNO (dest))
13489 || GET_CODE (src) != COMPARE)
13492 /* s390_swap_cmp will try to find the conditional
13493 jump when passing NULL_RTX as condition. */
13495 op0 = &XEXP (src, 0);
13496 op1 = &XEXP (src, 1);
13501 if (!REG_P (*op0) || !REG_P (*op1))
13504 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
13507 /* Swap the COMPARE arguments and its mask if there is a
13508 conflicting access in the previous insn. */
13509 prev_insn = prev_active_insn (insn);
13510 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13511 && reg_referenced_p (*op1, PATTERN (prev_insn)))
13512 s390_swap_cmp (cond, op0, op1, insn);
13514 /* Check if there is a conflict with the next insn. If there
13515 was no conflict with the previous insn, then swap the
13516 COMPARE arguments and its mask. If we already swapped
13517 the operands, or if swapping them would cause a conflict
13518 with the previous insn, issue a NOP after the COMPARE in
13519 order to separate the two instuctions. */
13520 next_insn = next_active_insn (insn);
13521 if (next_insn != NULL_RTX && INSN_P (next_insn)
13522 && s390_non_addr_reg_read_p (*op1, next_insn))
13524 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13525 && s390_non_addr_reg_read_p (*op0, prev_insn))
13527 if (REGNO (*op1) == 0)
13528 emit_insn_after (gen_nop1 (), insn);
13530 emit_insn_after (gen_nop (), insn);
13531 insn_added_p = true;
13534 s390_swap_cmp (cond, op0, op1, insn);
13536 return insn_added_p;
13539 /* Number of INSNs to be scanned backward in the last BB of the loop
13540 and forward in the first BB of the loop. This usually should be a
13541 bit more than the number of INSNs which could go into one
13543 #define S390_OSC_SCAN_INSN_NUM 5
13545 /* Scan LOOP for static OSC collisions and return true if a osc_break
13546 should be issued for this loop. */
13548 s390_adjust_loop_scan_osc (struct loop* loop)
13551 HARD_REG_SET modregs, newregs;
13552 rtx_insn *insn, *store_insn = NULL;
13554 struct s390_address addr_store, addr_load;
13555 subrtx_iterator::array_type array;
13558 CLEAR_HARD_REG_SET (modregs);
13561 FOR_BB_INSNS_REVERSE (loop->latch, insn)
13563 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13567 if (insn_count > S390_OSC_SCAN_INSN_NUM)
13570 find_all_hard_reg_sets (insn, &newregs, true);
13571 IOR_HARD_REG_SET (modregs, newregs);
13573 set = single_set (insn);
13577 if (MEM_P (SET_DEST (set))
13578 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
13585 if (store_insn == NULL_RTX)
13589 FOR_BB_INSNS (loop->header, insn)
13591 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13594 if (insn == store_insn)
13598 if (insn_count > S390_OSC_SCAN_INSN_NUM)
13601 find_all_hard_reg_sets (insn, &newregs, true);
13602 IOR_HARD_REG_SET (modregs, newregs);
13604 set = single_set (insn);
13608 /* An intermediate store disrupts static OSC checking
13610 if (MEM_P (SET_DEST (set))
13611 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
13614 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
13616 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
13617 && rtx_equal_p (addr_load.base, addr_store.base)
13618 && rtx_equal_p (addr_load.indx, addr_store.indx)
13619 && rtx_equal_p (addr_load.disp, addr_store.disp))
13621 if ((addr_load.base != NULL_RTX
13622 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
13623 || (addr_load.indx != NULL_RTX
13624 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
13631 /* Look for adjustments which can be done on simple innermost
13634 s390_adjust_loops ()
13636 struct loop *loop = NULL;
13639 compute_bb_for_insn ();
13641 /* Find the loops. */
13642 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
13644 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
13648 flow_loop_dump (loop, dump_file, NULL, 0);
13649 fprintf (dump_file, ";; OSC loop scan Loop: ");
13651 if (loop->latch == NULL
13652 || pc_set (BB_END (loop->latch)) == NULL_RTX
13653 || !s390_adjust_loop_scan_osc (loop))
13657 if (loop->latch == NULL)
13658 fprintf (dump_file, " muliple backward jumps\n");
13661 fprintf (dump_file, " header insn: %d latch insn: %d ",
13662 INSN_UID (BB_HEAD (loop->header)),
13663 INSN_UID (BB_END (loop->latch)));
13664 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
13665 fprintf (dump_file, " loop does not end with jump\n");
13667 fprintf (dump_file, " not instrumented\n");
13673 rtx_insn *new_insn;
13676 fprintf (dump_file, " adding OSC break insn: ");
13677 new_insn = emit_insn_before (gen_osc_break (),
13678 BB_END (loop->latch));
13679 INSN_ADDRESSES_NEW (new_insn, -1);
13683 loop_optimizer_finalize ();
13685 df_finish_pass (false);
13688 /* Perform machine-dependent processing. */
13693 bool pool_overflow = false;
13694 int hw_before, hw_after;
13696 if (s390_tune == PROCESSOR_2964_Z13)
13697 s390_adjust_loops ();
13699 /* Make sure all splits have been performed; splits after
13700 machine_dependent_reorg might confuse insn length counts. */
13701 split_all_insns_noflow ();
13703 /* Install the main literal pool and the associated base
13704 register load insns.
13706 In addition, there are two problematic situations we need
13709 - the literal pool might be > 4096 bytes in size, so that
13710 some of its elements cannot be directly accessed
13712 - a branch target might be > 64K away from the branch, so that
13713 it is not possible to use a PC-relative instruction.
13715 To fix those, we split the single literal pool into multiple
13716 pool chunks, reloading the pool base register at various
13717 points throughout the function to ensure it always points to
13718 the pool chunk the following code expects, and / or replace
13719 PC-relative branches by absolute branches.
13721 However, the two problems are interdependent: splitting the
13722 literal pool can move a branch further away from its target,
13723 causing the 64K limit to overflow, and on the other hand,
13724 replacing a PC-relative branch by an absolute branch means
13725 we need to put the branch target address into the literal
13726 pool, possibly causing it to overflow.
13728 So, we loop trying to fix up both problems until we manage
13729 to satisfy both conditions at the same time. Note that the
13730 loop is guaranteed to terminate as every pass of the loop
13731 strictly decreases the total number of PC-relative branches
13732 in the function. (This is not completely true as there
13733 might be branch-over-pool insns introduced by chunkify_start.
13734 Those never need to be split however.) */
13738 struct constant_pool *pool = NULL;
13740 /* Collect the literal pool. */
13741 if (!pool_overflow)
13743 pool = s390_mainpool_start ();
13745 pool_overflow = true;
13748 /* If literal pool overflowed, start to chunkify it. */
13750 pool = s390_chunkify_start ();
13752 /* Split out-of-range branches. If this has created new
13753 literal pool entries, cancel current chunk list and
13754 recompute it. zSeries machines have large branch
13755 instructions, so we never need to split a branch. */
13756 if (!TARGET_CPU_ZARCH && s390_split_branches ())
13759 s390_chunkify_cancel (pool);
13761 s390_mainpool_cancel (pool);
13766 /* If we made it up to here, both conditions are satisfied.
13767 Finish up literal pool related changes. */
13769 s390_chunkify_finish (pool);
13771 s390_mainpool_finish (pool);
13773 /* We're done splitting branches. */
13774 cfun->machine->split_branches_pending_p = false;
13778 /* Generate out-of-pool execute target insns. */
13779 if (TARGET_CPU_ZARCH)
13781 rtx_insn *insn, *target;
13784 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13786 label = s390_execute_label (insn);
13790 gcc_assert (label != const0_rtx);
13792 target = emit_label (XEXP (label, 0));
13793 INSN_ADDRESSES_NEW (target, -1);
13795 target = emit_insn (s390_execute_target (insn));
13796 INSN_ADDRESSES_NEW (target, -1);
13800 /* Try to optimize prologue and epilogue further. */
13801 s390_optimize_prologue ();
13803 /* Walk over the insns and do some >=z10 specific changes. */
13804 if (s390_tune >= PROCESSOR_2097_Z10)
13807 bool insn_added_p = false;
13809 /* The insn lengths and addresses have to be up to date for the
13810 following manipulations. */
13811 shorten_branches (get_insns ());
13813 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13815 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13819 insn_added_p |= s390_fix_long_loop_prediction (insn);
13821 if ((GET_CODE (PATTERN (insn)) == PARALLEL
13822 || GET_CODE (PATTERN (insn)) == SET)
13823 && s390_tune == PROCESSOR_2097_Z10)
13824 insn_added_p |= s390_z10_optimize_cmp (insn);
13827 /* Adjust branches if we added new instructions. */
13829 shorten_branches (get_insns ());
13832 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
13837 /* Insert NOPs for hotpatching. */
13838 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13840 1. inside the area covered by debug information to allow setting
13841 breakpoints at the NOPs,
13842 2. before any insn which results in an asm instruction,
13843 3. before in-function labels to avoid jumping to the NOPs, for
13844 example as part of a loop,
13845 4. before any barrier in case the function is completely empty
13846 (__builtin_unreachable ()) and has neither internal labels nor
13849 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
13851 /* Output a series of NOPs before the first active insn. */
13852 while (insn && hw_after > 0)
13854 if (hw_after >= 3 && TARGET_CPU_ZARCH)
13856 emit_insn_before (gen_nop_6_byte (), insn);
13859 else if (hw_after >= 2)
13861 emit_insn_before (gen_nop_4_byte (), insn);
13866 emit_insn_before (gen_nop_2_byte (), insn);
13873 /* Return true if INSN is a fp load insn writing register REGNO. */
13875 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
13878 enum attr_type flag = s390_safe_attr_type (insn);
13880 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
13883 set = single_set (insn);
13885 if (set == NULL_RTX)
13888 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
13891 if (REGNO (SET_DEST (set)) != regno)
13897 /* This value describes the distance to be avoided between an
13898 aritmetic fp instruction and an fp load writing the same register.
13899 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
13900 fine but the exact value has to be avoided. Otherwise the FP
13901 pipeline will throw an exception causing a major penalty. */
13902 #define Z10_EARLYLOAD_DISTANCE 7
13904 /* Rearrange the ready list in order to avoid the situation described
13905 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
13906 moved to the very end of the ready list. */
13908 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
13910 unsigned int regno;
13911 int nready = *nready_p;
13916 enum attr_type flag;
13919 /* Skip DISTANCE - 1 active insns. */
13920 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
13921 distance > 0 && insn != NULL_RTX;
13922 distance--, insn = prev_active_insn (insn))
13923 if (CALL_P (insn) || JUMP_P (insn))
13926 if (insn == NULL_RTX)
13929 set = single_set (insn);
13931 if (set == NULL_RTX || !REG_P (SET_DEST (set))
13932 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
13935 flag = s390_safe_attr_type (insn);
13937 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
13940 regno = REGNO (SET_DEST (set));
13943 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
13950 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
13955 /* The s390_sched_state variable tracks the state of the current or
13956 the last instruction group.
13958 0,1,2 number of instructions scheduled in the current group
13959 3 the last group is complete - normal insns
13960 4 the last group was a cracked/expanded insn */
13962 static int s390_sched_state;
13964 #define S390_SCHED_STATE_NORMAL 3
13965 #define S390_SCHED_STATE_CRACKED 4
13967 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
13968 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
13969 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
13970 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
13972 static unsigned int
13973 s390_get_sched_attrmask (rtx_insn *insn)
13975 unsigned int mask = 0;
13979 case PROCESSOR_2827_ZEC12:
13980 if (get_attr_zEC12_cracked (insn))
13981 mask |= S390_SCHED_ATTR_MASK_CRACKED;
13982 if (get_attr_zEC12_expanded (insn))
13983 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
13984 if (get_attr_zEC12_endgroup (insn))
13985 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
13986 if (get_attr_zEC12_groupalone (insn))
13987 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
13989 case PROCESSOR_2964_Z13:
13990 if (get_attr_z13_cracked (insn))
13991 mask |= S390_SCHED_ATTR_MASK_CRACKED;
13992 if (get_attr_z13_expanded (insn))
13993 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
13994 if (get_attr_z13_endgroup (insn))
13995 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
13996 if (get_attr_z13_groupalone (insn))
13997 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14000 gcc_unreachable ();
14005 static unsigned int
14006 s390_get_unit_mask (rtx_insn *insn, int *units)
14008 unsigned int mask = 0;
14012 case PROCESSOR_2964_Z13:
14014 if (get_attr_z13_unit_lsu (insn))
14016 if (get_attr_z13_unit_fxu (insn))
14018 if (get_attr_z13_unit_vfu (insn))
14022 gcc_unreachable ();
14027 /* Return the scheduling score for INSN. The higher the score the
14028 better. The score is calculated from the OOO scheduling attributes
14029 of INSN and the scheduling state s390_sched_state. */
14031 s390_sched_score (rtx_insn *insn)
14033 unsigned int mask = s390_get_sched_attrmask (insn);
14036 switch (s390_sched_state)
14039 /* Try to put insns into the first slot which would otherwise
14041 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14042 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14044 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14048 /* Prefer not cracked insns while trying to put together a
14050 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14051 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14052 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14054 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14058 /* Prefer not cracked insns while trying to put together a
14060 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14061 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14062 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14064 /* Prefer endgroup insns in the last slot. */
14065 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14068 case S390_SCHED_STATE_NORMAL:
14069 /* Prefer not cracked insns if the last was not cracked. */
14070 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14071 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
14073 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14076 case S390_SCHED_STATE_CRACKED:
14077 /* Try to keep cracked insns together to prevent them from
14078 interrupting groups. */
14079 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14080 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14085 if (s390_tune == PROCESSOR_2964_Z13)
14088 unsigned unit_mask, m = 1;
14090 unit_mask = s390_get_unit_mask (insn, &units);
14091 gcc_assert (units <= MAX_SCHED_UNITS);
14093 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14094 ago the last insn of this unit type got scheduled. This is
14095 supposed to help providing a proper instruction mix to the
14097 for (i = 0; i < units; i++, m <<= 1)
14099 score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
14100 MAX_SCHED_MIX_DISTANCE);
14105 /* This function is called via hook TARGET_SCHED_REORDER before
14106 issuing one insn from list READY which contains *NREADYP entries.
14107 For target z10 it reorders load instructions to avoid early load
14108 conflicts in the floating point pipeline */
14110 s390_sched_reorder (FILE *file, int verbose,
14111 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14113 if (s390_tune == PROCESSOR_2097_Z10
14114 && reload_completed
14116 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14118 if (s390_tune >= PROCESSOR_2827_ZEC12
14119 && reload_completed
14123 int last_index = *nreadyp - 1;
14124 int max_index = -1;
14125 int max_score = -1;
14128 /* Just move the insn with the highest score to the top (the
14129 end) of the list. A full sort is not needed since a conflict
14130 in the hazard recognition cannot happen. So the top insn in
14131 the ready list will always be taken. */
14132 for (i = last_index; i >= 0; i--)
14136 if (recog_memoized (ready[i]) < 0)
14139 score = s390_sched_score (ready[i]);
14140 if (score > max_score)
14147 if (max_index != -1)
14149 if (max_index != last_index)
14151 tmp = ready[max_index];
14152 ready[max_index] = ready[last_index];
14153 ready[last_index] = tmp;
14157 ";;\t\tBACKEND: move insn %d to the top of list\n",
14158 INSN_UID (ready[last_index]));
14160 else if (verbose > 5)
14162 ";;\t\tBACKEND: best insn %d already on top\n",
14163 INSN_UID (ready[last_index]));
14168 fprintf (file, "ready list ooo attributes - sched state: %d\n",
14171 for (i = last_index; i >= 0; i--)
14173 unsigned int sched_mask;
14174 rtx_insn *insn = ready[i];
14176 if (recog_memoized (insn) < 0)
14179 sched_mask = s390_get_sched_attrmask (insn);
14180 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14182 s390_sched_score (insn));
14183 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14184 ((M) & sched_mask) ? #ATTR : "");
14185 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14186 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14187 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14188 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14189 #undef PRINT_SCHED_ATTR
14190 if (s390_tune == PROCESSOR_2964_Z13)
14192 unsigned int unit_mask, m = 1;
14195 unit_mask = s390_get_unit_mask (insn, &units);
14196 fprintf (file, "(units:");
14197 for (j = 0; j < units; j++, m <<= 1)
14199 fprintf (file, " u%d", j);
14200 fprintf (file, ")");
14202 fprintf (file, "\n");
14207 return s390_issue_rate ();
14211 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14212 the scheduler has issued INSN. It stores the last issued insn into
14213 last_scheduled_insn in order to make it available for
14214 s390_sched_reorder. */
14216 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14218 last_scheduled_insn = insn;
14220 if (s390_tune >= PROCESSOR_2827_ZEC12
14221 && reload_completed
14222 && recog_memoized (insn) >= 0)
14224 unsigned int mask = s390_get_sched_attrmask (insn);
14226 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14227 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14228 s390_sched_state = S390_SCHED_STATE_CRACKED;
14229 else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
14230 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14231 s390_sched_state = S390_SCHED_STATE_NORMAL;
14234 /* Only normal insns are left (mask == 0). */
14235 switch (s390_sched_state)
14240 case S390_SCHED_STATE_NORMAL:
14241 if (s390_sched_state == S390_SCHED_STATE_NORMAL)
14242 s390_sched_state = 1;
14244 s390_sched_state++;
14247 case S390_SCHED_STATE_CRACKED:
14248 s390_sched_state = S390_SCHED_STATE_NORMAL;
14253 if (s390_tune == PROCESSOR_2964_Z13)
14256 unsigned unit_mask, m = 1;
14258 unit_mask = s390_get_unit_mask (insn, &units);
14259 gcc_assert (units <= MAX_SCHED_UNITS);
14261 for (i = 0; i < units; i++, m <<= 1)
14263 last_scheduled_unit_distance[i] = 0;
14264 else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
14265 last_scheduled_unit_distance[i]++;
14270 unsigned int sched_mask;
14272 sched_mask = s390_get_sched_attrmask (insn);
14274 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
14275 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
14276 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14277 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14278 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14279 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14280 #undef PRINT_SCHED_ATTR
14282 if (s390_tune == PROCESSOR_2964_Z13)
14284 unsigned int unit_mask, m = 1;
14287 unit_mask = s390_get_unit_mask (insn, &units);
14288 fprintf (file, "(units:");
14289 for (j = 0; j < units; j++, m <<= 1)
14291 fprintf (file, " %d", j);
14292 fprintf (file, ")");
14294 fprintf (file, " sched state: %d\n", s390_sched_state);
14296 if (s390_tune == PROCESSOR_2964_Z13)
14300 s390_get_unit_mask (insn, &units);
14302 fprintf (file, ";;\t\tBACKEND: units unused for: ");
14303 for (j = 0; j < units; j++)
14304 fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
14305 fprintf (file, "\n");
14310 if (GET_CODE (PATTERN (insn)) != USE
14311 && GET_CODE (PATTERN (insn)) != CLOBBER)
14318 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
14319 int verbose ATTRIBUTE_UNUSED,
14320 int max_ready ATTRIBUTE_UNUSED)
14322 last_scheduled_insn = NULL;
14323 memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
14324 s390_sched_state = 0;
14327 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
14328 a new number struct loop *loop should be unrolled if tuned for cpus with
14329 a built-in stride prefetcher.
14330 The loop is analyzed for memory accesses by calling check_dpu for
14331 each rtx of the loop. Depending on the loop_depth and the amount of
14332 memory accesses a new number <=nunroll is returned to improve the
14333 behavior of the hardware prefetch unit. */
14335 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
14340 unsigned mem_count = 0;
14342 if (s390_tune < PROCESSOR_2097_Z10)
14345 /* Count the number of memory references within the loop body. */
14346 bbs = get_loop_body (loop);
14347 subrtx_iterator::array_type array;
14348 for (i = 0; i < loop->num_nodes; i++)
14349 FOR_BB_INSNS (bbs[i], insn)
14350 if (INSN_P (insn) && INSN_CODE (insn) != -1)
14351 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14356 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
14357 if (mem_count == 0)
14360 switch (loop_depth(loop))
14363 return MIN (nunroll, 28 / mem_count);
14365 return MIN (nunroll, 22 / mem_count);
14367 return MIN (nunroll, 16 / mem_count);
14371 /* Restore the current options. This is a hook function and also called
14375 s390_function_specific_restore (struct gcc_options *opts,
14376 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
14378 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
14382 s390_option_override_internal (bool main_args_p,
14383 struct gcc_options *opts,
14384 const struct gcc_options *opts_set)
14386 const char *prefix;
14387 const char *suffix;
14389 /* Set up prefix/suffix so the error messages refer to either the command
14390 line argument, or the attribute(target). */
14398 prefix = "option(\"";
14403 /* Architecture mode defaults according to ABI. */
14404 if (!(opts_set->x_target_flags & MASK_ZARCH))
14407 opts->x_target_flags |= MASK_ZARCH;
14409 opts->x_target_flags &= ~MASK_ZARCH;
14412 /* Set the march default in case it hasn't been specified on cmdline. */
14413 if (!opts_set->x_s390_arch)
14414 opts->x_s390_arch = PROCESSOR_2064_Z900;
14415 else if (opts->x_s390_arch == PROCESSOR_9672_G5
14416 || opts->x_s390_arch == PROCESSOR_9672_G6)
14417 warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
14418 "in future releases; use at least %sarch=z900%s",
14419 prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
14420 suffix, prefix, suffix);
14422 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
14424 /* Determine processor to tune for. */
14425 if (!opts_set->x_s390_tune)
14426 opts->x_s390_tune = opts->x_s390_arch;
14427 else if (opts->x_s390_tune == PROCESSOR_9672_G5
14428 || opts->x_s390_tune == PROCESSOR_9672_G6)
14429 warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
14430 "in future releases; use at least %stune=z900%s",
14431 prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
14432 suffix, prefix, suffix);
14434 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
14436 /* Sanity checks. */
14437 if (opts->x_s390_arch == PROCESSOR_NATIVE
14438 || opts->x_s390_tune == PROCESSOR_NATIVE)
14439 gcc_unreachable ();
14440 if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
14441 error ("z/Architecture mode not supported on %s",
14442 processor_table[(int)opts->x_s390_arch].name);
14443 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
14444 error ("64-bit ABI not supported in ESA/390 mode");
14446 /* Enable hardware transactions if available and not explicitly
14447 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
14448 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
14450 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
14451 opts->x_target_flags |= MASK_OPT_HTM;
14453 opts->x_target_flags &= ~MASK_OPT_HTM;
14456 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
14458 if (TARGET_OPT_VX_P (opts->x_target_flags))
14460 if (!TARGET_CPU_VX_P (opts))
14461 error ("hardware vector support not available on %s",
14462 processor_table[(int)opts->x_s390_arch].name);
14463 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14464 error ("hardware vector support not available with -msoft-float");
14469 if (TARGET_CPU_VX_P (opts))
14470 /* Enable vector support if available and not explicitly disabled
14471 by user. E.g. with -m31 -march=z13 -mzarch */
14472 opts->x_target_flags |= MASK_OPT_VX;
14474 opts->x_target_flags &= ~MASK_OPT_VX;
14477 /* Use hardware DFP if available and not explicitly disabled by
14478 user. E.g. with -m31 -march=z10 -mzarch */
14479 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
14481 if (TARGET_DFP_P (opts))
14482 opts->x_target_flags |= MASK_HARD_DFP;
14484 opts->x_target_flags &= ~MASK_HARD_DFP;
14487 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
14489 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
14491 if (!TARGET_CPU_DFP_P (opts))
14492 error ("hardware decimal floating point instructions"
14493 " not available on %s",
14494 processor_table[(int)opts->x_s390_arch].name);
14495 if (!TARGET_ZARCH_P (opts->x_target_flags))
14496 error ("hardware decimal floating point instructions"
14497 " not available in ESA/390 mode");
14500 opts->x_target_flags &= ~MASK_HARD_DFP;
14503 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
14504 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14506 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
14507 && TARGET_HARD_DFP_P (opts->x_target_flags))
14508 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
14510 opts->x_target_flags &= ~MASK_HARD_DFP;
14513 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
14514 && TARGET_PACKED_STACK_P (opts->x_target_flags)
14515 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
14516 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
14519 if (opts->x_s390_stack_size)
14521 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
14522 error ("stack size must be greater than the stack guard value");
14523 else if (opts->x_s390_stack_size > 1 << 16)
14524 error ("stack size must not be greater than 64k");
14526 else if (opts->x_s390_stack_guard)
14527 error ("-mstack-guard implies use of -mstack-size");
14529 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
14530 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
14531 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
14534 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
14536 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
14537 opts->x_param_values,
14538 opts_set->x_param_values);
14539 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
14540 opts->x_param_values,
14541 opts_set->x_param_values);
14542 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
14543 opts->x_param_values,
14544 opts_set->x_param_values);
14545 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
14546 opts->x_param_values,
14547 opts_set->x_param_values);
14550 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
14551 opts->x_param_values,
14552 opts_set->x_param_values);
14553 /* values for loop prefetching */
14554 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
14555 opts->x_param_values,
14556 opts_set->x_param_values);
14557 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
14558 opts->x_param_values,
14559 opts_set->x_param_values);
14560 /* s390 has more than 2 levels and the size is much larger. Since
14561 we are always running virtualized assume that we only get a small
14562 part of the caches above l1. */
14563 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
14564 opts->x_param_values,
14565 opts_set->x_param_values);
14566 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
14567 opts->x_param_values,
14568 opts_set->x_param_values);
14569 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
14570 opts->x_param_values,
14571 opts_set->x_param_values);
14573 /* Use the alternative scheduling-pressure algorithm by default. */
14574 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
14575 opts->x_param_values,
14576 opts_set->x_param_values);
14578 maybe_set_param_value (PARAM_MIN_VECT_LOOP_BOUND, 2,
14579 opts->x_param_values,
14580 opts_set->x_param_values);
14582 /* Call target specific restore function to do post-init work. At the moment,
14583 this just sets opts->x_s390_cost_pointer. */
14584 s390_function_specific_restore (opts, NULL);
14588 s390_option_override (void)
14591 cl_deferred_option *opt;
14592 vec<cl_deferred_option> *v =
14593 (vec<cl_deferred_option> *) s390_deferred_options;
14596 FOR_EACH_VEC_ELT (*v, i, opt)
14598 switch (opt->opt_index)
14600 case OPT_mhotpatch_:
14607 strncpy (s, opt->arg, 256);
14609 t = strchr (s, ',');
14614 val1 = integral_argument (s);
14615 val2 = integral_argument (t);
14622 if (val1 == -1 || val2 == -1)
14624 /* argument is not a plain number */
14625 error ("arguments to %qs should be non-negative integers",
14629 else if (val1 > s390_hotpatch_hw_max
14630 || val2 > s390_hotpatch_hw_max)
14632 error ("argument to %qs is too large (max. %d)",
14633 "-mhotpatch=n,m", s390_hotpatch_hw_max);
14636 s390_hotpatch_hw_before_label = val1;
14637 s390_hotpatch_hw_after_label = val2;
14641 gcc_unreachable ();
14645 /* Set up function hooks. */
14646 init_machine_status = s390_init_machine_status;
14648 s390_option_override_internal (true, &global_options, &global_options_set);
14650 /* Save the initial options in case the user does function specific
14652 target_option_default_node = build_target_option_node (&global_options);
14653 target_option_current_node = target_option_default_node;
14655 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
14656 requires the arch flags to be evaluated already. Since prefetching
14657 is beneficial on s390, we enable it if available. */
14658 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
14659 flag_prefetch_loop_arrays = 1;
14663 /* Don't emit DWARF3/4 unless specifically selected. The TPF
14664 debuggers do not yet support DWARF 3/4. */
14665 if (!global_options_set.x_dwarf_strict)
14667 if (!global_options_set.x_dwarf_version)
14671 /* Register a target-specific optimization-and-lowering pass
14672 to run immediately before prologue and epilogue generation.
14674 Registering the pass must be done at start up. It's
14675 convenient to do it here. */
14676 opt_pass *new_pass = new pass_s390_early_mach (g);
14677 struct register_pass_info insert_pass_s390_early_mach =
14679 new_pass, /* pass */
14680 "pro_and_epilogue", /* reference_pass_name */
14681 1, /* ref_pass_instance_number */
14682 PASS_POS_INSERT_BEFORE /* po_op */
14684 register_pass (&insert_pass_s390_early_mach);
14687 #if S390_USE_TARGET_ATTRIBUTE
14688 /* Inner function to process the attribute((target(...))), take an argument and
14689 set the current options from the argument. If we have a list, recursively go
14693 s390_valid_target_attribute_inner_p (tree args,
14694 struct gcc_options *opts,
14695 struct gcc_options *new_opts_set,
14701 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
14702 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
14703 static const struct
14705 const char *string;
14709 int only_as_pragma;
14712 S390_ATTRIB ("arch=", OPT_march_, 1),
14713 S390_ATTRIB ("tune=", OPT_mtune_, 1),
14714 /* uinteger options */
14715 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
14716 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
14717 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
14718 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
14720 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
14721 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
14722 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
14723 S390_ATTRIB ("htm", OPT_mhtm, 0),
14724 S390_ATTRIB ("vx", OPT_mvx, 0),
14725 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
14726 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
14727 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
14728 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
14729 S390_PRAGMA ("zvector", OPT_mzvector, 0),
14730 /* boolean options */
14731 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
14736 /* If this is a list, recurse to get the options. */
14737 if (TREE_CODE (args) == TREE_LIST)
14740 int num_pragma_values;
14743 /* Note: attribs.c:decl_attributes prepends the values from
14744 current_target_pragma to the list of target attributes. To determine
14745 whether we're looking at a value of the attribute or the pragma we
14746 assume that the first [list_length (current_target_pragma)] values in
14747 the list are the values from the pragma. */
14748 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
14749 ? list_length (current_target_pragma) : 0;
14750 for (i = 0; args; args = TREE_CHAIN (args), i++)
14754 is_pragma = (force_pragma || i < num_pragma_values);
14755 if (TREE_VALUE (args)
14756 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
14757 opts, new_opts_set,
14766 else if (TREE_CODE (args) != STRING_CST)
14768 error ("attribute %<target%> argument not a string");
14772 /* Handle multiple arguments separated by commas. */
14773 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
14775 while (next_optstr && *next_optstr != '\0')
14777 char *p = next_optstr;
14779 char *comma = strchr (next_optstr, ',');
14780 size_t len, opt_len;
14786 enum cl_var_type var_type;
14792 len = comma - next_optstr;
14793 next_optstr = comma + 1;
14798 next_optstr = NULL;
14801 /* Recognize no-xxx. */
14802 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
14811 /* Find the option. */
14814 for (i = 0; i < ARRAY_SIZE (attrs); i++)
14816 opt_len = attrs[i].len;
14817 if (ch == attrs[i].string[0]
14818 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
14819 && memcmp (p, attrs[i].string, opt_len) == 0)
14821 opt = attrs[i].opt;
14822 if (!opt_set_p && cl_options[opt].cl_reject_negative)
14824 mask = cl_options[opt].var_value;
14825 var_type = cl_options[opt].var_type;
14831 /* Process the option. */
14834 error ("attribute(target(\"%s\")) is unknown", orig_p);
14837 else if (attrs[i].only_as_pragma && !force_pragma)
14839 /* Value is not allowed for the target attribute. */
14840 error ("value %qs is not supported by attribute %<target%>",
14845 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
14847 if (var_type == CLVC_BIT_CLEAR)
14848 opt_set_p = !opt_set_p;
14851 opts->x_target_flags |= mask;
14853 opts->x_target_flags &= ~mask;
14854 new_opts_set->x_target_flags |= mask;
14857 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
14861 if (cl_options[opt].cl_uinteger)
14863 /* Unsigned integer argument. Code based on the function
14864 decode_cmdline_option () in opts-common.c. */
14865 value = integral_argument (p + opt_len);
14868 value = (opt_set_p) ? 1 : 0;
14872 struct cl_decoded_option decoded;
14874 /* Value range check; only implemented for numeric and boolean
14875 options at the moment. */
14876 generate_option (opt, NULL, value, CL_TARGET, &decoded);
14877 s390_handle_option (opts, new_opts_set, &decoded, input_location);
14878 set_option (opts, new_opts_set, opt, value,
14879 p + opt_len, DK_UNSPECIFIED, input_location,
14884 error ("attribute(target(\"%s\")) is unknown", orig_p);
14889 else if (cl_options[opt].var_type == CLVC_ENUM)
14894 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
14896 set_option (opts, new_opts_set, opt, value,
14897 p + opt_len, DK_UNSPECIFIED, input_location,
14901 error ("attribute(target(\"%s\")) is unknown", orig_p);
14907 gcc_unreachable ();
14912 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
14915 s390_valid_target_attribute_tree (tree args,
14916 struct gcc_options *opts,
14917 const struct gcc_options *opts_set,
14920 tree t = NULL_TREE;
14921 struct gcc_options new_opts_set;
14923 memset (&new_opts_set, 0, sizeof (new_opts_set));
14925 /* Process each of the options on the chain. */
14926 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
14928 return error_mark_node;
14930 /* If some option was set (even if it has not changed), rerun
14931 s390_option_override_internal, and then save the options away. */
14932 if (new_opts_set.x_target_flags
14933 || new_opts_set.x_s390_arch
14934 || new_opts_set.x_s390_tune
14935 || new_opts_set.x_s390_stack_guard
14936 || new_opts_set.x_s390_stack_size
14937 || new_opts_set.x_s390_branch_cost
14938 || new_opts_set.x_s390_warn_framesize
14939 || new_opts_set.x_s390_warn_dynamicstack_p)
14941 const unsigned char *src = (const unsigned char *)opts_set;
14942 unsigned char *dest = (unsigned char *)&new_opts_set;
14945 /* Merge the original option flags into the new ones. */
14946 for (i = 0; i < sizeof(*opts_set); i++)
14949 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
14950 s390_option_override_internal (false, opts, &new_opts_set);
14951 /* Save the current options unless we are validating options for
14953 t = build_target_option_node (opts);
14958 /* Hook to validate attribute((target("string"))). */
14961 s390_valid_target_attribute_p (tree fndecl,
14962 tree ARG_UNUSED (name),
14964 int ARG_UNUSED (flags))
14966 struct gcc_options func_options;
14967 tree new_target, new_optimize;
14970 /* attribute((target("default"))) does nothing, beyond
14971 affecting multi-versioning. */
14972 if (TREE_VALUE (args)
14973 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
14974 && TREE_CHAIN (args) == NULL_TREE
14975 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
14978 tree old_optimize = build_optimization_node (&global_options);
14980 /* Get the optimization options of the current function. */
14981 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
14983 if (!func_optimize)
14984 func_optimize = old_optimize;
14986 /* Init func_options. */
14987 memset (&func_options, 0, sizeof (func_options));
14988 init_options_struct (&func_options, NULL);
14989 lang_hooks.init_options_struct (&func_options);
14991 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
14993 /* Initialize func_options to the default before its target options can
14995 cl_target_option_restore (&func_options,
14996 TREE_TARGET_OPTION (target_option_default_node));
14998 new_target = s390_valid_target_attribute_tree (args, &func_options,
14999 &global_options_set,
15001 current_target_pragma));
15002 new_optimize = build_optimization_node (&func_options);
15003 if (new_target == error_mark_node)
15005 else if (fndecl && new_target)
15007 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15008 if (old_optimize != new_optimize)
15009 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15014 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15018 s390_activate_target_options (tree new_tree)
15020 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
15021 if (TREE_TARGET_GLOBALS (new_tree))
15022 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15023 else if (new_tree == target_option_default_node)
15024 restore_target_globals (&default_target_globals);
15026 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15027 s390_previous_fndecl = NULL_TREE;
15030 /* Establish appropriate back-end context for processing the function
15031 FNDECL. The argument might be NULL to indicate processing at top
15032 level, outside of any function scope. */
15034 s390_set_current_function (tree fndecl)
15036 /* Only change the context if the function changes. This hook is called
15037 several times in the course of compiling a function, and we don't want to
15038 slow things down too much or call target_reinit when it isn't safe. */
15039 if (fndecl == s390_previous_fndecl)
15043 if (s390_previous_fndecl == NULL_TREE)
15044 old_tree = target_option_current_node;
15045 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
15046 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
15048 old_tree = target_option_default_node;
15050 if (fndecl == NULL_TREE)
15052 if (old_tree != target_option_current_node)
15053 s390_activate_target_options (target_option_current_node);
15057 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
15058 if (new_tree == NULL_TREE)
15059 new_tree = target_option_default_node;
15061 if (old_tree != new_tree)
15062 s390_activate_target_options (new_tree);
15063 s390_previous_fndecl = fndecl;
15067 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
15070 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
15071 unsigned int align ATTRIBUTE_UNUSED,
15072 enum by_pieces_operation op ATTRIBUTE_UNUSED,
15073 bool speed_p ATTRIBUTE_UNUSED)
15075 return (size == 1 || size == 2
15076 || size == 4 || (TARGET_ZARCH && size == 8));
15079 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
15082 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
15084 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
15085 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
15086 tree call_efpc = build_call_expr (efpc, 0);
15087 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
15089 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
15090 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
15091 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
15092 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
15093 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
15094 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
15096 /* Generates the equivalent of feholdexcept (&fenv_var)
15098 fenv_var = __builtin_s390_efpc ();
15099 __builtin_s390_sfpc (fenv_var & mask) */
15100 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
15102 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
15103 build_int_cst (unsigned_type_node,
15104 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
15105 FPC_EXCEPTION_MASK)));
15106 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
15107 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
15109 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
15111 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
15112 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
15113 build_int_cst (unsigned_type_node,
15114 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
15115 *clear = build_call_expr (sfpc, 1, new_fpc);
15117 /* Generates the equivalent of feupdateenv (fenv_var)
15119 old_fpc = __builtin_s390_efpc ();
15120 __builtin_s390_sfpc (fenv_var);
15121 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
15123 old_fpc = create_tmp_var_raw (unsigned_type_node);
15124 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
15125 old_fpc, call_efpc);
15127 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
15129 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
15130 build_int_cst (unsigned_type_node,
15132 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
15133 build_int_cst (unsigned_type_node,
15135 tree atomic_feraiseexcept
15136 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
15137 raise_old_except = build_call_expr (atomic_feraiseexcept,
15138 1, raise_old_except);
15140 *update = build2 (COMPOUND_EXPR, void_type_node,
15141 build2 (COMPOUND_EXPR, void_type_node,
15142 store_old_fpc, set_new_fpc),
15145 #undef FPC_EXCEPTION_MASK
15146 #undef FPC_FLAGS_MASK
15147 #undef FPC_DXC_MASK
15148 #undef FPC_EXCEPTION_MASK_SHIFT
15149 #undef FPC_FLAGS_SHIFT
15150 #undef FPC_DXC_SHIFT
15153 /* Return the vector mode to be used for inner mode MODE when doing
15155 static machine_mode
15156 s390_preferred_simd_mode (machine_mode mode)
15176 /* Our hardware does not require vectors to be strictly aligned. */
15178 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
15179 const_tree type ATTRIBUTE_UNUSED,
15180 int misalignment ATTRIBUTE_UNUSED,
15181 bool is_packed ATTRIBUTE_UNUSED)
15186 return default_builtin_support_vector_misalignment (mode, type, misalignment,
15190 /* The vector ABI requires vector types to be aligned on an 8 byte
15191 boundary (our stack alignment). However, we allow this to be
15192 overriden by the user, while this definitely breaks the ABI. */
15193 static HOST_WIDE_INT
15194 s390_vector_alignment (const_tree type)
15196 if (!TARGET_VX_ABI)
15197 return default_vector_alignment (type);
15199 if (TYPE_USER_ALIGN (type))
15200 return TYPE_ALIGN (type);
15202 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
15205 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15206 /* Implement TARGET_ASM_FILE_START. */
15208 s390_asm_file_start (void)
15210 default_file_start ();
15211 s390_asm_output_machine_for_arch (asm_out_file);
15215 /* Implement TARGET_ASM_FILE_END. */
15217 s390_asm_file_end (void)
15219 #ifdef HAVE_AS_GNU_ATTRIBUTE
15220 varpool_node *vnode;
15221 cgraph_node *cnode;
15223 FOR_EACH_VARIABLE (vnode)
15224 if (TREE_PUBLIC (vnode->decl))
15225 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
15227 FOR_EACH_FUNCTION (cnode)
15228 if (TREE_PUBLIC (cnode->decl))
15229 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
15232 if (s390_vector_abi != 0)
15233 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
15236 file_end_indicate_exec_stack ();
15238 if (flag_split_stack)
15239 file_end_indicate_split_stack ();
15242 /* Return true if TYPE is a vector bool type. */
15244 s390_vector_bool_type_p (const_tree type)
15246 return TYPE_VECTOR_OPAQUE (type);
15249 /* Return the diagnostic message string if the binary operation OP is
15250 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15252 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
15254 bool bool1_p, bool2_p;
15258 machine_mode mode1, mode2;
15260 if (!TARGET_ZVECTOR)
15263 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
15266 bool1_p = s390_vector_bool_type_p (type1);
15267 bool2_p = s390_vector_bool_type_p (type2);
15269 /* Mixing signed and unsigned types is forbidden for all
15271 if (!bool1_p && !bool2_p
15272 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
15273 return N_("types differ in signedness");
15275 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
15276 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
15277 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
15278 || op == ROUND_DIV_EXPR);
15279 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
15280 || op == EQ_EXPR || op == NE_EXPR);
15282 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
15283 return N_("binary operator does not support two vector bool operands");
15285 if (bool1_p != bool2_p && (muldiv_p || compare_p))
15286 return N_("binary operator does not support vector bool operand");
15288 mode1 = TYPE_MODE (type1);
15289 mode2 = TYPE_MODE (type2);
15291 if (bool1_p != bool2_p && plusminus_p
15292 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
15293 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
15294 return N_("binary operator does not support mixing vector "
15295 "bool with floating point vector operands");
15300 /* Implement TARGET_C_EXCESS_PRECISION.
15302 FIXME: For historical reasons, float_t and double_t are typedef'ed to
15303 double on s390, causing operations on float_t to operate in a higher
15304 precision than is necessary. However, it is not the case that SFmode
15305 operations have implicit excess precision, and we generate more optimal
15306 code if we let the compiler know no implicit extra precision is added.
15308 That means when we are compiling with -fexcess-precision=fast, the value
15309 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
15310 float_t (though they would be correct for -fexcess-precision=standard).
15312 A complete fix would modify glibc to remove the unnecessary typedef
15313 of float_t to double. */
15315 static enum flt_eval_method
15316 s390_excess_precision (enum excess_precision_type type)
15320 case EXCESS_PRECISION_TYPE_IMPLICIT:
15321 case EXCESS_PRECISION_TYPE_FAST:
15322 /* The fastest type to promote to will always be the native type,
15323 whether that occurs with implicit excess precision or
15325 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
15326 case EXCESS_PRECISION_TYPE_STANDARD:
15327 /* Otherwise, when we are in a standards compliant mode, to
15328 ensure consistency with the implementation in glibc, report that
15329 float is evaluated to the range and precision of double. */
15330 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
15332 gcc_unreachable ();
15334 return FLT_EVAL_METHOD_UNPREDICTABLE;
15337 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
15339 static unsigned HOST_WIDE_INT
15340 s390_asan_shadow_offset (void)
15342 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
15345 /* Initialize GCC target structure. */
15347 #undef TARGET_ASM_ALIGNED_HI_OP
15348 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
15349 #undef TARGET_ASM_ALIGNED_DI_OP
15350 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
15351 #undef TARGET_ASM_INTEGER
15352 #define TARGET_ASM_INTEGER s390_assemble_integer
15354 #undef TARGET_ASM_OPEN_PAREN
15355 #define TARGET_ASM_OPEN_PAREN ""
15357 #undef TARGET_ASM_CLOSE_PAREN
15358 #define TARGET_ASM_CLOSE_PAREN ""
15360 #undef TARGET_OPTION_OVERRIDE
15361 #define TARGET_OPTION_OVERRIDE s390_option_override
15363 #ifdef TARGET_THREAD_SSP_OFFSET
15364 #undef TARGET_STACK_PROTECT_GUARD
15365 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
15368 #undef TARGET_ENCODE_SECTION_INFO
15369 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
15371 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15372 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15375 #undef TARGET_HAVE_TLS
15376 #define TARGET_HAVE_TLS true
15378 #undef TARGET_CANNOT_FORCE_CONST_MEM
15379 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
15381 #undef TARGET_DELEGITIMIZE_ADDRESS
15382 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
15384 #undef TARGET_LEGITIMIZE_ADDRESS
15385 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
15387 #undef TARGET_RETURN_IN_MEMORY
15388 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
15390 #undef TARGET_INIT_BUILTINS
15391 #define TARGET_INIT_BUILTINS s390_init_builtins
15392 #undef TARGET_EXPAND_BUILTIN
15393 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
15394 #undef TARGET_BUILTIN_DECL
15395 #define TARGET_BUILTIN_DECL s390_builtin_decl
15397 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
15398 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
15400 #undef TARGET_ASM_OUTPUT_MI_THUNK
15401 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
15402 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
15403 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
15405 #undef TARGET_C_EXCESS_PRECISION
15406 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
15408 #undef TARGET_SCHED_ADJUST_PRIORITY
15409 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
15410 #undef TARGET_SCHED_ISSUE_RATE
15411 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
15412 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15413 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
15415 #undef TARGET_SCHED_VARIABLE_ISSUE
15416 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
15417 #undef TARGET_SCHED_REORDER
15418 #define TARGET_SCHED_REORDER s390_sched_reorder
15419 #undef TARGET_SCHED_INIT
15420 #define TARGET_SCHED_INIT s390_sched_init
15422 #undef TARGET_CANNOT_COPY_INSN_P
15423 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
15424 #undef TARGET_RTX_COSTS
15425 #define TARGET_RTX_COSTS s390_rtx_costs
15426 #undef TARGET_ADDRESS_COST
15427 #define TARGET_ADDRESS_COST s390_address_cost
15428 #undef TARGET_REGISTER_MOVE_COST
15429 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
15430 #undef TARGET_MEMORY_MOVE_COST
15431 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
15432 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
15433 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
15434 s390_builtin_vectorization_cost
15436 #undef TARGET_MACHINE_DEPENDENT_REORG
15437 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
15439 #undef TARGET_VALID_POINTER_MODE
15440 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
15442 #undef TARGET_BUILD_BUILTIN_VA_LIST
15443 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
15444 #undef TARGET_EXPAND_BUILTIN_VA_START
15445 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
15446 #undef TARGET_ASAN_SHADOW_OFFSET
15447 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
15448 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
15449 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
15451 #undef TARGET_PROMOTE_FUNCTION_MODE
15452 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
15453 #undef TARGET_PASS_BY_REFERENCE
15454 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
15456 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
15457 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
15458 #undef TARGET_FUNCTION_ARG
15459 #define TARGET_FUNCTION_ARG s390_function_arg
15460 #undef TARGET_FUNCTION_ARG_ADVANCE
15461 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
15462 #undef TARGET_FUNCTION_VALUE
15463 #define TARGET_FUNCTION_VALUE s390_function_value
15464 #undef TARGET_LIBCALL_VALUE
15465 #define TARGET_LIBCALL_VALUE s390_libcall_value
15466 #undef TARGET_STRICT_ARGUMENT_NAMING
15467 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
15469 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
15470 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
15472 #undef TARGET_FIXED_CONDITION_CODE_REGS
15473 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
15475 #undef TARGET_CC_MODES_COMPATIBLE
15476 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
15478 #undef TARGET_INVALID_WITHIN_DOLOOP
15479 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
15482 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
15483 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
15486 #undef TARGET_DWARF_FRAME_REG_MODE
15487 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
15489 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
15490 #undef TARGET_MANGLE_TYPE
15491 #define TARGET_MANGLE_TYPE s390_mangle_type
15494 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15495 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15497 #undef TARGET_VECTOR_MODE_SUPPORTED_P
15498 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
15500 #undef TARGET_PREFERRED_RELOAD_CLASS
15501 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
15503 #undef TARGET_SECONDARY_RELOAD
15504 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
15506 #undef TARGET_LIBGCC_CMP_RETURN_MODE
15507 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
15509 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
15510 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
15512 #undef TARGET_LEGITIMATE_ADDRESS_P
15513 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
15515 #undef TARGET_LEGITIMATE_CONSTANT_P
15516 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
15518 #undef TARGET_LRA_P
15519 #define TARGET_LRA_P s390_lra_p
15521 #undef TARGET_CAN_ELIMINATE
15522 #define TARGET_CAN_ELIMINATE s390_can_eliminate
15524 #undef TARGET_CONDITIONAL_REGISTER_USAGE
15525 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
15527 #undef TARGET_LOOP_UNROLL_ADJUST
15528 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
15530 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
15531 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
15532 #undef TARGET_TRAMPOLINE_INIT
15533 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
15536 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
15537 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
15539 #undef TARGET_UNWIND_WORD_MODE
15540 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
15542 #undef TARGET_CANONICALIZE_COMPARISON
15543 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
15545 #undef TARGET_HARD_REGNO_SCRATCH_OK
15546 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
15548 #undef TARGET_ATTRIBUTE_TABLE
15549 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
15551 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
15552 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
15554 #undef TARGET_SET_UP_BY_PROLOGUE
15555 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
15557 #undef TARGET_EXTRA_LIVE_ON_ENTRY
15558 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
15560 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
15561 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
15562 s390_use_by_pieces_infrastructure_p
15564 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
15565 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
15567 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
15568 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
15570 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
15571 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
15573 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
15574 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
15576 #undef TARGET_VECTOR_ALIGNMENT
15577 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
15579 #undef TARGET_INVALID_BINARY_OP
15580 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
15582 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15583 #undef TARGET_ASM_FILE_START
15584 #define TARGET_ASM_FILE_START s390_asm_file_start
15587 #undef TARGET_ASM_FILE_END
15588 #define TARGET_ASM_FILE_END s390_asm_file_end
15590 #if S390_USE_TARGET_ATTRIBUTE
15591 #undef TARGET_SET_CURRENT_FUNCTION
15592 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
15594 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
15595 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
15598 #undef TARGET_OPTION_RESTORE
15599 #define TARGET_OPTION_RESTORE s390_function_specific_restore
15601 struct gcc_target targetm = TARGET_INITIALIZER;
15603 #include "gt-s390.h"