1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2017 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
28 #include "target-globals.h"
37 #include "stringpool.h"
44 #include "diagnostic-core.h"
45 #include "diagnostic.h"
47 #include "fold-const.h"
48 #include "print-tree.h"
49 #include "stor-layout.h"
52 #include "conditions.h"
54 #include "insn-attr.h"
66 #include "cfgcleanup.h"
68 #include "langhooks.h"
69 #include "internal-fn.h"
70 #include "gimple-fold.h"
75 #include "tree-pass.h"
80 #include "tm-constrs.h"
82 /* This file should be included last. */
83 #include "target-def.h"
85 /* Remember the last target of s390_set_current_function. */
86 static GTY(()) tree s390_previous_fndecl;
88 /* Define the specific costs for a given cpu. */
90 struct processor_costs
93 const int m; /* cost of an M instruction. */
94 const int mghi; /* cost of an MGHI instruction. */
95 const int mh; /* cost of an MH instruction. */
96 const int mhi; /* cost of an MHI instruction. */
97 const int ml; /* cost of an ML instruction. */
98 const int mr; /* cost of an MR instruction. */
99 const int ms; /* cost of an MS instruction. */
100 const int msg; /* cost of an MSG instruction. */
101 const int msgf; /* cost of an MSGF instruction. */
102 const int msgfr; /* cost of an MSGFR instruction. */
103 const int msgr; /* cost of an MSGR instruction. */
104 const int msr; /* cost of an MSR instruction. */
105 const int mult_df; /* cost of multiplication in DFmode. */
108 const int sqxbr; /* cost of square root in TFmode. */
109 const int sqdbr; /* cost of square root in DFmode. */
110 const int sqebr; /* cost of square root in SFmode. */
111 /* multiply and add */
112 const int madbr; /* cost of multiply and add in DFmode. */
113 const int maebr; /* cost of multiply and add in SFmode. */
125 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
128 struct processor_costs z900_cost =
130 COSTS_N_INSNS (5), /* M */
131 COSTS_N_INSNS (10), /* MGHI */
132 COSTS_N_INSNS (5), /* MH */
133 COSTS_N_INSNS (4), /* MHI */
134 COSTS_N_INSNS (5), /* ML */
135 COSTS_N_INSNS (5), /* MR */
136 COSTS_N_INSNS (4), /* MS */
137 COSTS_N_INSNS (15), /* MSG */
138 COSTS_N_INSNS (7), /* MSGF */
139 COSTS_N_INSNS (7), /* MSGFR */
140 COSTS_N_INSNS (10), /* MSGR */
141 COSTS_N_INSNS (4), /* MSR */
142 COSTS_N_INSNS (7), /* multiplication in DFmode */
143 COSTS_N_INSNS (13), /* MXBR */
144 COSTS_N_INSNS (136), /* SQXBR */
145 COSTS_N_INSNS (44), /* SQDBR */
146 COSTS_N_INSNS (35), /* SQEBR */
147 COSTS_N_INSNS (18), /* MADBR */
148 COSTS_N_INSNS (13), /* MAEBR */
149 COSTS_N_INSNS (134), /* DXBR */
150 COSTS_N_INSNS (30), /* DDBR */
151 COSTS_N_INSNS (27), /* DEBR */
152 COSTS_N_INSNS (220), /* DLGR */
153 COSTS_N_INSNS (34), /* DLR */
154 COSTS_N_INSNS (34), /* DR */
155 COSTS_N_INSNS (32), /* DSGFR */
156 COSTS_N_INSNS (32), /* DSGR */
160 struct processor_costs z990_cost =
162 COSTS_N_INSNS (4), /* M */
163 COSTS_N_INSNS (2), /* MGHI */
164 COSTS_N_INSNS (2), /* MH */
165 COSTS_N_INSNS (2), /* MHI */
166 COSTS_N_INSNS (4), /* ML */
167 COSTS_N_INSNS (4), /* MR */
168 COSTS_N_INSNS (5), /* MS */
169 COSTS_N_INSNS (6), /* MSG */
170 COSTS_N_INSNS (4), /* MSGF */
171 COSTS_N_INSNS (4), /* MSGFR */
172 COSTS_N_INSNS (4), /* MSGR */
173 COSTS_N_INSNS (4), /* MSR */
174 COSTS_N_INSNS (1), /* multiplication in DFmode */
175 COSTS_N_INSNS (28), /* MXBR */
176 COSTS_N_INSNS (130), /* SQXBR */
177 COSTS_N_INSNS (66), /* SQDBR */
178 COSTS_N_INSNS (38), /* SQEBR */
179 COSTS_N_INSNS (1), /* MADBR */
180 COSTS_N_INSNS (1), /* MAEBR */
181 COSTS_N_INSNS (60), /* DXBR */
182 COSTS_N_INSNS (40), /* DDBR */
183 COSTS_N_INSNS (26), /* DEBR */
184 COSTS_N_INSNS (176), /* DLGR */
185 COSTS_N_INSNS (31), /* DLR */
186 COSTS_N_INSNS (31), /* DR */
187 COSTS_N_INSNS (31), /* DSGFR */
188 COSTS_N_INSNS (31), /* DSGR */
192 struct processor_costs z9_109_cost =
194 COSTS_N_INSNS (4), /* M */
195 COSTS_N_INSNS (2), /* MGHI */
196 COSTS_N_INSNS (2), /* MH */
197 COSTS_N_INSNS (2), /* MHI */
198 COSTS_N_INSNS (4), /* ML */
199 COSTS_N_INSNS (4), /* MR */
200 COSTS_N_INSNS (5), /* MS */
201 COSTS_N_INSNS (6), /* MSG */
202 COSTS_N_INSNS (4), /* MSGF */
203 COSTS_N_INSNS (4), /* MSGFR */
204 COSTS_N_INSNS (4), /* MSGR */
205 COSTS_N_INSNS (4), /* MSR */
206 COSTS_N_INSNS (1), /* multiplication in DFmode */
207 COSTS_N_INSNS (28), /* MXBR */
208 COSTS_N_INSNS (130), /* SQXBR */
209 COSTS_N_INSNS (66), /* SQDBR */
210 COSTS_N_INSNS (38), /* SQEBR */
211 COSTS_N_INSNS (1), /* MADBR */
212 COSTS_N_INSNS (1), /* MAEBR */
213 COSTS_N_INSNS (60), /* DXBR */
214 COSTS_N_INSNS (40), /* DDBR */
215 COSTS_N_INSNS (26), /* DEBR */
216 COSTS_N_INSNS (30), /* DLGR */
217 COSTS_N_INSNS (23), /* DLR */
218 COSTS_N_INSNS (23), /* DR */
219 COSTS_N_INSNS (24), /* DSGFR */
220 COSTS_N_INSNS (24), /* DSGR */
224 struct processor_costs z10_cost =
226 COSTS_N_INSNS (10), /* M */
227 COSTS_N_INSNS (10), /* MGHI */
228 COSTS_N_INSNS (10), /* MH */
229 COSTS_N_INSNS (10), /* MHI */
230 COSTS_N_INSNS (10), /* ML */
231 COSTS_N_INSNS (10), /* MR */
232 COSTS_N_INSNS (10), /* MS */
233 COSTS_N_INSNS (10), /* MSG */
234 COSTS_N_INSNS (10), /* MSGF */
235 COSTS_N_INSNS (10), /* MSGFR */
236 COSTS_N_INSNS (10), /* MSGR */
237 COSTS_N_INSNS (10), /* MSR */
238 COSTS_N_INSNS (1) , /* multiplication in DFmode */
239 COSTS_N_INSNS (50), /* MXBR */
240 COSTS_N_INSNS (120), /* SQXBR */
241 COSTS_N_INSNS (52), /* SQDBR */
242 COSTS_N_INSNS (38), /* SQEBR */
243 COSTS_N_INSNS (1), /* MADBR */
244 COSTS_N_INSNS (1), /* MAEBR */
245 COSTS_N_INSNS (111), /* DXBR */
246 COSTS_N_INSNS (39), /* DDBR */
247 COSTS_N_INSNS (32), /* DEBR */
248 COSTS_N_INSNS (160), /* DLGR */
249 COSTS_N_INSNS (71), /* DLR */
250 COSTS_N_INSNS (71), /* DR */
251 COSTS_N_INSNS (71), /* DSGFR */
252 COSTS_N_INSNS (71), /* DSGR */
256 struct processor_costs z196_cost =
258 COSTS_N_INSNS (7), /* M */
259 COSTS_N_INSNS (5), /* MGHI */
260 COSTS_N_INSNS (5), /* MH */
261 COSTS_N_INSNS (5), /* MHI */
262 COSTS_N_INSNS (7), /* ML */
263 COSTS_N_INSNS (7), /* MR */
264 COSTS_N_INSNS (6), /* MS */
265 COSTS_N_INSNS (8), /* MSG */
266 COSTS_N_INSNS (6), /* MSGF */
267 COSTS_N_INSNS (6), /* MSGFR */
268 COSTS_N_INSNS (8), /* MSGR */
269 COSTS_N_INSNS (6), /* MSR */
270 COSTS_N_INSNS (1) , /* multiplication in DFmode */
271 COSTS_N_INSNS (40), /* MXBR B+40 */
272 COSTS_N_INSNS (100), /* SQXBR B+100 */
273 COSTS_N_INSNS (42), /* SQDBR B+42 */
274 COSTS_N_INSNS (28), /* SQEBR B+28 */
275 COSTS_N_INSNS (1), /* MADBR B */
276 COSTS_N_INSNS (1), /* MAEBR B */
277 COSTS_N_INSNS (101), /* DXBR B+101 */
278 COSTS_N_INSNS (29), /* DDBR */
279 COSTS_N_INSNS (22), /* DEBR */
280 COSTS_N_INSNS (160), /* DLGR cracked */
281 COSTS_N_INSNS (160), /* DLR cracked */
282 COSTS_N_INSNS (160), /* DR expanded */
283 COSTS_N_INSNS (160), /* DSGFR cracked */
284 COSTS_N_INSNS (160), /* DSGR cracked */
288 struct processor_costs zEC12_cost =
290 COSTS_N_INSNS (7), /* M */
291 COSTS_N_INSNS (5), /* MGHI */
292 COSTS_N_INSNS (5), /* MH */
293 COSTS_N_INSNS (5), /* MHI */
294 COSTS_N_INSNS (7), /* ML */
295 COSTS_N_INSNS (7), /* MR */
296 COSTS_N_INSNS (6), /* MS */
297 COSTS_N_INSNS (8), /* MSG */
298 COSTS_N_INSNS (6), /* MSGF */
299 COSTS_N_INSNS (6), /* MSGFR */
300 COSTS_N_INSNS (8), /* MSGR */
301 COSTS_N_INSNS (6), /* MSR */
302 COSTS_N_INSNS (1) , /* multiplication in DFmode */
303 COSTS_N_INSNS (40), /* MXBR B+40 */
304 COSTS_N_INSNS (100), /* SQXBR B+100 */
305 COSTS_N_INSNS (42), /* SQDBR B+42 */
306 COSTS_N_INSNS (28), /* SQEBR B+28 */
307 COSTS_N_INSNS (1), /* MADBR B */
308 COSTS_N_INSNS (1), /* MAEBR B */
309 COSTS_N_INSNS (131), /* DXBR B+131 */
310 COSTS_N_INSNS (29), /* DDBR */
311 COSTS_N_INSNS (22), /* DEBR */
312 COSTS_N_INSNS (160), /* DLGR cracked */
313 COSTS_N_INSNS (160), /* DLR cracked */
314 COSTS_N_INSNS (160), /* DR expanded */
315 COSTS_N_INSNS (160), /* DSGFR cracked */
316 COSTS_N_INSNS (160), /* DSGR cracked */
321 const char *const name;
322 const enum processor_type processor;
323 const struct processor_costs *cost;
325 const processor_table[] =
327 { "g5", PROCESSOR_9672_G5, &z900_cost },
328 { "g6", PROCESSOR_9672_G6, &z900_cost },
329 { "z900", PROCESSOR_2064_Z900, &z900_cost },
330 { "z990", PROCESSOR_2084_Z990, &z990_cost },
331 { "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
332 { "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost },
333 { "z10", PROCESSOR_2097_Z10, &z10_cost },
334 { "z196", PROCESSOR_2817_Z196, &z196_cost },
335 { "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost },
336 { "z13", PROCESSOR_2964_Z13, &zEC12_cost },
337 { "native", PROCESSOR_NATIVE, NULL }
340 extern int reload_completed;
342 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
343 static rtx_insn *last_scheduled_insn;
344 #define MAX_SCHED_UNITS 3
345 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
347 /* The maximum score added for an instruction whose unit hasn't been
348 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
349 give instruction mix scheduling more priority over instruction
351 #define MAX_SCHED_MIX_SCORE 8
353 /* The maximum distance up to which individual scores will be
354 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
355 Increase this with the OOO windows size of the machine. */
356 #define MAX_SCHED_MIX_DISTANCE 100
358 /* Structure used to hold the components of a S/390 memory
359 address. A legitimate address on S/390 is of the general
361 base + index + displacement
362 where any of the components is optional.
364 base and index are registers of the class ADDR_REGS,
365 displacement is an unsigned 12-bit immediate constant. */
376 /* The following structure is embedded in the machine
377 specific part of struct function. */
379 struct GTY (()) s390_frame_layout
381 /* Offset within stack frame. */
382 HOST_WIDE_INT gprs_offset;
383 HOST_WIDE_INT f0_offset;
384 HOST_WIDE_INT f4_offset;
385 HOST_WIDE_INT f8_offset;
386 HOST_WIDE_INT backchain_offset;
388 /* Number of first and last gpr where slots in the register
389 save area are reserved for. */
390 int first_save_gpr_slot;
391 int last_save_gpr_slot;
393 /* Location (FP register number) where GPRs (r0-r15) should
395 0 - does not need to be saved at all
397 #define SAVE_SLOT_NONE 0
398 #define SAVE_SLOT_STACK -1
399 signed char gpr_save_slots[16];
401 /* Number of first and last gpr to be saved, restored. */
403 int first_restore_gpr;
405 int last_restore_gpr;
407 /* Bits standing for floating point registers. Set, if the
408 respective register has to be saved. Starting with reg 16 (f0)
409 at the rightmost bit.
410 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
411 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
412 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
413 unsigned int fpr_bitmap;
415 /* Number of floating point registers f8-f15 which must be saved. */
418 /* Set if return address needs to be saved.
419 This flag is set by s390_return_addr_rtx if it could not use
420 the initial value of r14 and therefore depends on r14 saved
422 bool save_return_addr_p;
424 /* Size of stack frame. */
425 HOST_WIDE_INT frame_size;
428 /* Define the structure for the machine field in struct function. */
430 struct GTY(()) machine_function
432 struct s390_frame_layout frame_layout;
434 /* Literal pool base register. */
437 /* True if we may need to perform branch splitting. */
438 bool split_branches_pending_p;
440 bool has_landing_pad_p;
442 /* True if the current function may contain a tbegin clobbering
446 /* For -fsplit-stack support: A stack local which holds a pointer to
447 the stack arguments for a function with a variable number of
448 arguments. This is set at the start of the function and is used
449 to initialize the overflow_arg_area field of the va_list
451 rtx split_stack_varargs_pointer;
454 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
456 #define cfun_frame_layout (cfun->machine->frame_layout)
457 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
458 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
459 ? cfun_frame_layout.fpr_bitmap & 0x0f \
460 : cfun_frame_layout.fpr_bitmap & 0x03))
461 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
462 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
463 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
464 (1 << (REGNO - FPR0_REGNUM)))
465 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
466 (1 << (REGNO - FPR0_REGNUM))))
467 #define cfun_gpr_save_slot(REGNO) \
468 cfun->machine->frame_layout.gpr_save_slots[REGNO]
470 /* Number of GPRs and FPRs used for argument passing. */
471 #define GP_ARG_NUM_REG 5
472 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
473 #define VEC_ARG_NUM_REG 8
475 /* A couple of shortcuts. */
476 #define CONST_OK_FOR_J(x) \
477 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
478 #define CONST_OK_FOR_K(x) \
479 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
480 #define CONST_OK_FOR_Os(x) \
481 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
482 #define CONST_OK_FOR_Op(x) \
483 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
484 #define CONST_OK_FOR_On(x) \
485 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
487 #define REGNO_PAIR_OK(REGNO, MODE) \
488 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
490 /* That's the read ahead of the dynamic branch prediction unit in
491 bytes on a z10 (or higher) CPU. */
492 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
495 /* Indicate which ABI has been used for passing vector args.
496 0 - no vector type arguments have been passed where the ABI is relevant
497 1 - the old ABI has been used
498 2 - a vector type argument has been passed either in a vector register
499 or on the stack by value */
500 static int s390_vector_abi = 0;
502 /* Set the vector ABI marker if TYPE is subject to the vector ABI
503 switch. The vector ABI affects only vector data types. There are
504 two aspects of the vector ABI relevant here:
506 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
507 ABI and natural alignment with the old.
509 2. vector <= 16 bytes are passed in VRs or by value on the stack
510 with the new ABI but by reference on the stack with the old.
512 If ARG_P is true TYPE is used for a function argument or return
513 value. The ABI marker then is set for all vector data types. If
514 ARG_P is false only type 1 vectors are being checked. */
517 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
519 static hash_set<const_tree> visited_types_hash;
524 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
527 if (visited_types_hash.contains (type))
530 visited_types_hash.add (type);
532 if (VECTOR_TYPE_P (type))
534 int type_size = int_size_in_bytes (type);
536 /* Outside arguments only the alignment is changing and this
537 only happens for vector types >= 16 bytes. */
538 if (!arg_p && type_size < 16)
541 /* In arguments vector types > 16 are passed as before (GCC
542 never enforced the bigger alignment for arguments which was
543 required by the old vector ABI). However, it might still be
544 ABI relevant due to the changed alignment if it is a struct
546 if (arg_p && type_size > 16 && !in_struct_p)
549 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
551 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
553 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
554 natural alignment there will never be ABI dependent padding
555 in an array type. That's why we do not set in_struct_p to
557 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
559 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
563 /* Check the return type. */
564 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
566 for (arg_chain = TYPE_ARG_TYPES (type);
568 arg_chain = TREE_CHAIN (arg_chain))
569 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
571 else if (RECORD_OR_UNION_TYPE_P (type))
575 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
577 if (TREE_CODE (field) != FIELD_DECL)
580 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
586 /* System z builtins. */
588 #include "s390-builtins.h"
590 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
595 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
597 #define OB_DEF_VAR(...)
598 #include "s390-builtins.def"
602 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
607 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
609 #define OB_DEF_VAR(...)
610 #include "s390-builtins.def"
614 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
620 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
621 #define OB_DEF_VAR(...)
622 #include "s390-builtins.def"
627 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
634 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS,
635 #include "s390-builtins.def"
639 tree s390_builtin_types[BT_MAX];
640 tree s390_builtin_fn_types[BT_FN_MAX];
641 tree s390_builtin_decls[S390_BUILTIN_MAX +
642 S390_OVERLOADED_BUILTIN_MAX +
643 S390_OVERLOADED_BUILTIN_VAR_MAX];
645 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
649 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
651 #define OB_DEF_VAR(...)
653 #include "s390-builtins.def"
658 s390_init_builtins (void)
660 /* These definitions are being used in s390-builtins.def. */
661 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
663 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
664 tree c_uint64_type_node;
666 /* The uint64_type_node from tree.c is not compatible to the C99
667 uint64_t data type. What we want is c_uint64_type_node from
668 c-common.c. But since backend code is not supposed to interface
669 with the frontend we recreate it here. */
671 c_uint64_type_node = long_unsigned_type_node;
673 c_uint64_type_node = long_long_unsigned_type_node;
676 #define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P) \
677 if (s390_builtin_types[INDEX] == NULL) \
678 s390_builtin_types[INDEX] = (!CONST_P) ? \
679 (NODE) : build_type_variant ((NODE), 1, 0);
681 #undef DEF_POINTER_TYPE
682 #define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE) \
683 if (s390_builtin_types[INDEX] == NULL) \
684 s390_builtin_types[INDEX] = \
685 build_pointer_type (s390_builtin_types[INDEX_BASE]);
687 #undef DEF_DISTINCT_TYPE
688 #define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE) \
689 if (s390_builtin_types[INDEX] == NULL) \
690 s390_builtin_types[INDEX] = \
691 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
693 #undef DEF_VECTOR_TYPE
694 #define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
695 if (s390_builtin_types[INDEX] == NULL) \
696 s390_builtin_types[INDEX] = \
697 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
699 #undef DEF_OPAQUE_VECTOR_TYPE
700 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
701 if (s390_builtin_types[INDEX] == NULL) \
702 s390_builtin_types[INDEX] = \
703 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
706 #define DEF_FN_TYPE(INDEX, BFLAGS, args...) \
707 if (s390_builtin_fn_types[INDEX] == NULL) \
708 s390_builtin_fn_types[INDEX] = \
709 build_function_type_list (args, NULL_TREE);
711 #define DEF_OV_TYPE(...)
712 #include "s390-builtin-types.def"
715 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
716 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
717 s390_builtin_decls[S390_BUILTIN_##NAME] = \
718 add_builtin_function ("__builtin_" #NAME, \
719 s390_builtin_fn_types[FNTYPE], \
720 S390_BUILTIN_##NAME, \
725 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
726 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
728 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
729 add_builtin_function ("__builtin_" #NAME, \
730 s390_builtin_fn_types[FNTYPE], \
731 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
736 #define OB_DEF_VAR(...)
737 #include "s390-builtins.def"
741 /* Return true if ARG is appropriate as argument number ARGNUM of
742 builtin DECL. The operand flags from s390-builtins.def have to
743 passed as OP_FLAGS. */
745 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
747 if (O_UIMM_P (op_flags))
749 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
750 int bitwidth = bitwidths[op_flags - O_U1];
752 if (!tree_fits_uhwi_p (arg)
753 || tree_to_uhwi (arg) > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1)
755 error("constant argument %d for builtin %qF is out of range (0.."
756 HOST_WIDE_INT_PRINT_UNSIGNED ")",
758 ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1);
763 if (O_SIMM_P (op_flags))
765 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
766 int bitwidth = bitwidths[op_flags - O_S2];
768 if (!tree_fits_shwi_p (arg)
769 || tree_to_shwi (arg) < -((HOST_WIDE_INT)1 << (bitwidth - 1))
770 || tree_to_shwi (arg) > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1))
772 error("constant argument %d for builtin %qF is out of range ("
773 HOST_WIDE_INT_PRINT_DEC ".."
774 HOST_WIDE_INT_PRINT_DEC ")",
776 -((HOST_WIDE_INT)1 << (bitwidth - 1)),
777 ((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1);
784 /* Expand an expression EXP that calls a built-in function,
785 with result going to TARGET if that's convenient
786 (and in mode MODE if that's convenient).
787 SUBTARGET may be used as the target for computing one of EXP's operands.
788 IGNORE is nonzero if the value is to be ignored. */
791 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
792 machine_mode mode ATTRIBUTE_UNUSED,
793 int ignore ATTRIBUTE_UNUSED)
797 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
798 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
799 enum insn_code icode;
800 rtx op[MAX_ARGS], pat;
804 call_expr_arg_iterator iter;
805 unsigned int all_op_flags = opflags_for_builtin (fcode);
806 machine_mode last_vec_mode = VOIDmode;
808 if (TARGET_DEBUG_ARG)
811 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
812 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
813 bflags_for_builtin (fcode));
816 if (S390_USE_TARGET_ATTRIBUTE)
820 bflags = bflags_for_builtin (fcode);
821 if ((bflags & B_HTM) && !TARGET_HTM)
823 error ("Builtin %qF is not supported without -mhtm "
824 "(default with -march=zEC12 and higher).", fndecl);
827 if ((bflags & B_VX) && !TARGET_VX)
829 error ("Builtin %qF is not supported without -mvx "
830 "(default with -march=z13 and higher).", fndecl);
834 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
835 && fcode < S390_ALL_BUILTIN_MAX)
839 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
841 icode = code_for_builtin[fcode];
842 /* Set a flag in the machine specific cfun part in order to support
843 saving/restoring of FPRs. */
844 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
845 cfun->machine->tbegin_p = true;
847 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
849 error ("Unresolved overloaded builtin");
853 internal_error ("bad builtin fcode");
856 internal_error ("bad builtin icode");
858 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
862 machine_mode tmode = insn_data[icode].operand[0].mode;
864 || GET_MODE (target) != tmode
865 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
866 target = gen_reg_rtx (tmode);
868 /* There are builtins (e.g. vec_promote) with no vector
869 arguments but an element selector. So we have to also look
870 at the vector return type when emitting the modulo
872 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
873 last_vec_mode = insn_data[icode].operand[0].mode;
877 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
880 const struct insn_operand_data *insn_op;
881 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
883 all_op_flags = all_op_flags >> O_SHIFT;
885 if (arg == error_mark_node)
887 if (arity >= MAX_ARGS)
890 if (O_IMM_P (op_flags)
891 && TREE_CODE (arg) != INTEGER_CST)
893 error ("constant value required for builtin %qF argument %d",
898 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
901 insn_op = &insn_data[icode].operand[arity + nonvoid];
902 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
904 /* expand_expr truncates constants to the target mode only if it
905 is "convenient". However, our checks below rely on this
907 if (CONST_INT_P (op[arity])
908 && SCALAR_INT_MODE_P (insn_op->mode)
909 && GET_MODE (op[arity]) != insn_op->mode)
910 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
913 /* Wrap the expanded RTX for pointer types into a MEM expr with
914 the proper mode. This allows us to use e.g. (match_operand
915 "memory_operand"..) in the insn patterns instead of (mem
916 (match_operand "address_operand)). This is helpful for
917 patterns not just accepting MEMs. */
918 if (POINTER_TYPE_P (TREE_TYPE (arg))
919 && insn_op->predicate != address_operand)
920 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
922 /* Expand the module operation required on element selectors. */
923 if (op_flags == O_ELEM)
925 gcc_assert (last_vec_mode != VOIDmode);
926 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
928 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
929 NULL_RTX, 1, OPTAB_DIRECT);
932 /* Record the vector mode used for an element selector. This assumes:
933 1. There is no builtin with two different vector modes and an element selector
934 2. The element selector comes after the vector type it is referring to.
935 This currently the true for all the builtins but FIXME we
936 should better check for that. */
937 if (VECTOR_MODE_P (insn_op->mode))
938 last_vec_mode = insn_op->mode;
940 if (insn_op->predicate (op[arity], insn_op->mode))
946 if (MEM_P (op[arity])
947 && insn_op->predicate == memory_operand
948 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
949 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
951 op[arity] = replace_equiv_address (op[arity],
952 copy_to_mode_reg (Pmode,
953 XEXP (op[arity], 0)));
955 /* Some of the builtins require different modes/types than the
956 pattern in order to implement a specific API. Instead of
957 adding many expanders which do the mode change we do it here.
958 E.g. s390_vec_add_u128 required to have vector unsigned char
959 arguments is mapped to addti3. */
960 else if (insn_op->mode != VOIDmode
961 && GET_MODE (op[arity]) != VOIDmode
962 && GET_MODE (op[arity]) != insn_op->mode
963 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
964 GET_MODE (op[arity]), 0))
969 else if (GET_MODE (op[arity]) == insn_op->mode
970 || GET_MODE (op[arity]) == VOIDmode
971 || (insn_op->predicate == address_operand
972 && GET_MODE (op[arity]) == Pmode))
974 /* An address_operand usually has VOIDmode in the expander
975 so we cannot use this. */
976 machine_mode target_mode =
977 (insn_op->predicate == address_operand
978 ? Pmode : insn_op->mode);
979 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
982 if (!insn_op->predicate (op[arity], insn_op->mode))
984 error ("Invalid argument %d for builtin %qF", arity + 1, fndecl);
993 pat = GEN_FCN (icode) (target);
997 pat = GEN_FCN (icode) (target, op[0]);
999 pat = GEN_FCN (icode) (op[0]);
1003 pat = GEN_FCN (icode) (target, op[0], op[1]);
1005 pat = GEN_FCN (icode) (op[0], op[1]);
1009 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1011 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1015 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1017 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1021 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1023 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1027 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1029 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1045 static const int s390_hotpatch_hw_max = 1000000;
1046 static int s390_hotpatch_hw_before_label = 0;
1047 static int s390_hotpatch_hw_after_label = 0;
1049 /* Check whether the hotpatch attribute is applied to a function and, if it has
1050 an argument, the argument is valid. */
1053 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1054 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1060 if (TREE_CODE (*node) != FUNCTION_DECL)
1062 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1064 *no_add_attrs = true;
1066 if (args != NULL && TREE_CHAIN (args) != NULL)
1068 expr = TREE_VALUE (args);
1069 expr2 = TREE_VALUE (TREE_CHAIN (args));
1071 if (args == NULL || TREE_CHAIN (args) == NULL)
1073 else if (TREE_CODE (expr) != INTEGER_CST
1074 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1075 || wi::gtu_p (expr, s390_hotpatch_hw_max))
1077 else if (TREE_CODE (expr2) != INTEGER_CST
1078 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1079 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
1085 error ("requested %qE attribute is not a comma separated pair of"
1086 " non-negative integer constants or too large (max. %d)", name,
1087 s390_hotpatch_hw_max);
1088 *no_add_attrs = true;
1094 /* Expand the s390_vector_bool type attribute. */
1097 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1098 tree args ATTRIBUTE_UNUSED,
1099 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1101 tree type = *node, result = NULL_TREE;
1104 while (POINTER_TYPE_P (type)
1105 || TREE_CODE (type) == FUNCTION_TYPE
1106 || TREE_CODE (type) == METHOD_TYPE
1107 || TREE_CODE (type) == ARRAY_TYPE)
1108 type = TREE_TYPE (type);
1110 mode = TYPE_MODE (type);
1113 case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break;
1114 case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break;
1115 case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break;
1116 case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI];
1120 *no_add_attrs = true; /* No need to hang on to the attribute. */
1123 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1128 static const struct attribute_spec s390_attribute_table[] = {
1129 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
1130 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
1132 { NULL, 0, 0, false, false, false, NULL, false }
1135 /* Return the alignment for LABEL. We default to the -falign-labels
1136 value except for the literal pool base label. */
1138 s390_label_align (rtx_insn *label)
1140 rtx_insn *prev_insn = prev_active_insn (label);
1143 if (prev_insn == NULL_RTX)
1146 set = single_set (prev_insn);
1148 if (set == NULL_RTX)
1151 src = SET_SRC (set);
1153 /* Don't align literal pool base labels. */
1154 if (GET_CODE (src) == UNSPEC
1155 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1159 return align_labels_log;
1163 s390_libgcc_cmp_return_mode (void)
1165 return TARGET_64BIT ? DImode : SImode;
1169 s390_libgcc_shift_count_mode (void)
1171 return TARGET_64BIT ? DImode : SImode;
1175 s390_unwind_word_mode (void)
1177 return TARGET_64BIT ? DImode : SImode;
1180 /* Return true if the back end supports mode MODE. */
1182 s390_scalar_mode_supported_p (machine_mode mode)
1184 /* In contrast to the default implementation reject TImode constants on 31bit
1185 TARGET_ZARCH for ABI compliance. */
1186 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1189 if (DECIMAL_FLOAT_MODE_P (mode))
1190 return default_decimal_float_supported_p ();
1192 return default_scalar_mode_supported_p (mode);
1195 /* Return true if the back end supports vector mode MODE. */
1197 s390_vector_mode_supported_p (machine_mode mode)
1201 if (!VECTOR_MODE_P (mode)
1203 || GET_MODE_SIZE (mode) > 16)
1206 inner = GET_MODE_INNER (mode);
1224 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1227 s390_set_has_landing_pad_p (bool value)
1229 cfun->machine->has_landing_pad_p = value;
1232 /* If two condition code modes are compatible, return a condition code
1233 mode which is compatible with both. Otherwise, return
1237 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1245 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1246 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1267 /* Return true if SET either doesn't set the CC register, or else
1268 the source and destination have matching CC modes and that
1269 CC mode is at least as constrained as REQ_MODE. */
1272 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1274 machine_mode set_mode;
1276 gcc_assert (GET_CODE (set) == SET);
1278 /* These modes are supposed to be used only in CC consumer
1280 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1281 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1283 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1286 set_mode = GET_MODE (SET_DEST (set));
1305 if (req_mode != set_mode)
1310 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1311 && req_mode != CCSRmode && req_mode != CCURmode)
1317 if (req_mode != CCAmode)
1325 return (GET_MODE (SET_SRC (set)) == set_mode);
1328 /* Return true if every SET in INSN that sets the CC register
1329 has source and destination with matching CC modes and that
1330 CC mode is at least as constrained as REQ_MODE.
1331 If REQ_MODE is VOIDmode, always return false. */
1334 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1338 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1339 if (req_mode == VOIDmode)
1342 if (GET_CODE (PATTERN (insn)) == SET)
1343 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1345 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1346 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1348 rtx set = XVECEXP (PATTERN (insn), 0, i);
1349 if (GET_CODE (set) == SET)
1350 if (!s390_match_ccmode_set (set, req_mode))
1357 /* If a test-under-mask instruction can be used to implement
1358 (compare (and ... OP1) OP2), return the CC mode required
1359 to do that. Otherwise, return VOIDmode.
1360 MIXED is true if the instruction can distinguish between
1361 CC1 and CC2 for mixed selected bits (TMxx), it is false
1362 if the instruction cannot (TM). */
1365 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1369 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1370 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1373 /* Selected bits all zero: CC0.
1374 e.g.: int a; if ((a & (16 + 128)) == 0) */
1375 if (INTVAL (op2) == 0)
1378 /* Selected bits all one: CC3.
1379 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1380 if (INTVAL (op2) == INTVAL (op1))
1383 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1385 if ((a & (16 + 128)) == 16) -> CCT1
1386 if ((a & (16 + 128)) == 128) -> CCT2 */
1389 bit1 = exact_log2 (INTVAL (op2));
1390 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1391 if (bit0 != -1 && bit1 != -1)
1392 return bit0 > bit1 ? CCT1mode : CCT2mode;
1398 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1399 OP0 and OP1 of a COMPARE, return the mode to be used for the
1403 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1406 && register_operand (op0, DFmode)
1407 && register_operand (op1, DFmode))
1409 /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either
1410 s390_emit_compare or s390_canonicalize_comparison will take
1432 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1433 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1435 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1436 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1438 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1439 || GET_CODE (op1) == NEG)
1440 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1443 if (GET_CODE (op0) == AND)
1445 /* Check whether we can potentially do it via TM. */
1446 machine_mode ccmode;
1447 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1448 if (ccmode != VOIDmode)
1450 /* Relax CCTmode to CCZmode to allow fall-back to AND
1451 if that turns out to be beneficial. */
1452 return ccmode == CCTmode ? CCZmode : ccmode;
1456 if (register_operand (op0, HImode)
1457 && GET_CODE (op1) == CONST_INT
1458 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1460 if (register_operand (op0, QImode)
1461 && GET_CODE (op1) == CONST_INT
1462 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1471 /* The only overflow condition of NEG and ABS happens when
1472 -INT_MAX is used as parameter, which stays negative. So
1473 we have an overflow from a positive value to a negative.
1474 Using CCAP mode the resulting cc can be used for comparisons. */
1475 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1476 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1479 /* If constants are involved in an add instruction it is possible to use
1480 the resulting cc for comparisons with zero. Knowing the sign of the
1481 constant the overflow behavior gets predictable. e.g.:
1482 int a, b; if ((b = a + c) > 0)
1483 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1484 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1485 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1486 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1487 /* Avoid INT32_MIN on 32 bit. */
1488 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1490 if (INTVAL (XEXP((op0), 1)) < 0)
1504 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1505 && GET_CODE (op1) != CONST_INT)
1511 if (GET_CODE (op0) == PLUS
1512 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1515 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1516 && GET_CODE (op1) != CONST_INT)
1522 if (GET_CODE (op0) == MINUS
1523 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1526 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1527 && GET_CODE (op1) != CONST_INT)
1536 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1537 that we can implement more efficiently. */
1540 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1541 bool op0_preserve_value)
1543 if (op0_preserve_value)
1546 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1547 if ((*code == EQ || *code == NE)
1548 && *op1 == const0_rtx
1549 && GET_CODE (*op0) == ZERO_EXTRACT
1550 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1551 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1552 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1554 rtx inner = XEXP (*op0, 0);
1555 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1556 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1557 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1559 if (len > 0 && len < modesize
1560 && pos >= 0 && pos + len <= modesize
1561 && modesize <= HOST_BITS_PER_WIDE_INT)
1563 unsigned HOST_WIDE_INT block;
1564 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
1565 block <<= modesize - pos - len;
1567 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1568 gen_int_mode (block, GET_MODE (inner)));
1572 /* Narrow AND of memory against immediate to enable TM. */
1573 if ((*code == EQ || *code == NE)
1574 && *op1 == const0_rtx
1575 && GET_CODE (*op0) == AND
1576 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1577 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1579 rtx inner = XEXP (*op0, 0);
1580 rtx mask = XEXP (*op0, 1);
1582 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1583 if (GET_CODE (inner) == SUBREG
1584 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1585 && (GET_MODE_SIZE (GET_MODE (inner))
1586 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1588 & GET_MODE_MASK (GET_MODE (inner))
1589 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1591 inner = SUBREG_REG (inner);
1593 /* Do not change volatile MEMs. */
1594 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1596 int part = s390_single_part (XEXP (*op0, 1),
1597 GET_MODE (inner), QImode, 0);
1600 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1601 inner = adjust_address_nv (inner, QImode, part);
1602 *op0 = gen_rtx_AND (QImode, inner, mask);
1607 /* Narrow comparisons against 0xffff to HImode if possible. */
1608 if ((*code == EQ || *code == NE)
1609 && GET_CODE (*op1) == CONST_INT
1610 && INTVAL (*op1) == 0xffff
1611 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1612 && (nonzero_bits (*op0, GET_MODE (*op0))
1613 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
1615 *op0 = gen_lowpart (HImode, *op0);
1619 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1620 if (GET_CODE (*op0) == UNSPEC
1621 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1622 && XVECLEN (*op0, 0) == 1
1623 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1624 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1625 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1626 && *op1 == const0_rtx)
1628 enum rtx_code new_code = UNKNOWN;
1631 case EQ: new_code = EQ; break;
1632 case NE: new_code = NE; break;
1633 case LT: new_code = GTU; break;
1634 case GT: new_code = LTU; break;
1635 case LE: new_code = GEU; break;
1636 case GE: new_code = LEU; break;
1640 if (new_code != UNKNOWN)
1642 *op0 = XVECEXP (*op0, 0, 0);
1647 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1648 if (GET_CODE (*op0) == UNSPEC
1649 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1650 && XVECLEN (*op0, 0) == 1
1651 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1652 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1653 && CONST_INT_P (*op1))
1655 enum rtx_code new_code = UNKNOWN;
1656 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1662 case EQ: new_code = EQ; break;
1663 case NE: new_code = NE; break;
1670 if (new_code != UNKNOWN)
1672 /* For CCRAWmode put the required cc mask into the second
1674 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1675 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1676 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1677 *op0 = XVECEXP (*op0, 0, 0);
1682 /* Simplify cascaded EQ, NE with const0_rtx. */
1683 if ((*code == NE || *code == EQ)
1684 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1685 && GET_MODE (*op0) == SImode
1686 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1687 && REG_P (XEXP (*op0, 0))
1688 && XEXP (*op0, 1) == const0_rtx
1689 && *op1 == const0_rtx)
1691 if ((*code == EQ && GET_CODE (*op0) == NE)
1692 || (*code == NE && GET_CODE (*op0) == EQ))
1696 *op0 = XEXP (*op0, 0);
1699 /* Prefer register over memory as first operand. */
1700 if (MEM_P (*op0) && REG_P (*op1))
1702 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1703 *code = (int)swap_condition ((enum rtx_code)*code);
1706 /* Using the scalar variants of vector instructions for 64 bit FP
1707 comparisons might require swapping the operands. */
1709 && register_operand (*op0, DFmode)
1710 && register_operand (*op1, DFmode)
1711 && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
1717 case LT: *code = GT; break;
1718 case LE: *code = GE; break;
1719 case UNGT: *code = UNLE; break;
1720 case UNGE: *code = UNLT; break;
1723 tmp = *op0; *op0 = *op1; *op1 = tmp;
1726 /* A comparison result is compared against zero. Replace it with
1727 the (perhaps inverted) original comparison.
1728 This probably should be done by simplify_relational_operation. */
1729 if ((*code == EQ || *code == NE)
1730 && *op1 == const0_rtx
1731 && COMPARISON_P (*op0)
1732 && CC_REG_P (XEXP (*op0, 0)))
1734 enum rtx_code new_code;
1737 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1739 XEXP (*op1, 0), NULL);
1741 new_code = GET_CODE (*op0);
1743 if (new_code != UNKNOWN)
1746 *op1 = XEXP (*op0, 1);
1747 *op0 = XEXP (*op0, 0);
1752 /* Helper function for s390_emit_compare. If possible emit a 64 bit
1753 FP compare using the single element variant of vector instructions.
1754 Replace CODE with the comparison code to be used in the CC reg
1755 compare and return the condition code register RTX in CC. */
1758 s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
1761 machine_mode cmp_mode;
1762 bool swap_p = false;
1766 case EQ: cmp_mode = CCVEQmode; break;
1767 case NE: cmp_mode = CCVEQmode; break;
1768 case GT: cmp_mode = CCVFHmode; break;
1769 case GE: cmp_mode = CCVFHEmode; break;
1770 case UNLE: cmp_mode = CCVFHmode; break;
1771 case UNLT: cmp_mode = CCVFHEmode; break;
1772 case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break;
1773 case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break;
1774 case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break;
1775 case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
1776 default: return false;
1786 emit_insn (gen_rtx_PARALLEL (VOIDmode,
1788 gen_rtx_SET (gen_rtx_REG (cmp_mode, CC_REGNUM),
1789 gen_rtx_COMPARE (cmp_mode, cmp1,
1791 gen_rtx_CLOBBER (VOIDmode,
1792 gen_rtx_SCRATCH (V2DImode)))));
1794 /* This is the cc reg how it will be used in the cc mode consumer.
1795 It either needs to be CCVFALL or CCVFANY. However, CC1 will
1796 never be set by the scalar variants. So it actually doesn't
1797 matter which one we choose here. */
1798 *cc = gen_rtx_REG (CCVFALLmode, CC_REGNUM);
1803 /* Emit a compare instruction suitable to implement the comparison
1804 OP0 CODE OP1. Return the correct condition RTL to be placed in
1805 the IF_THEN_ELSE of the conditional branch testing the result. */
1808 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1810 machine_mode mode = s390_select_ccmode (code, op0, op1);
1814 && register_operand (op0, DFmode)
1815 && register_operand (op1, DFmode)
1816 && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
1818 /* Work has been done by s390_expand_vec_compare_scalar already. */
1820 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1822 /* Do not output a redundant compare instruction if a
1823 compare_and_swap pattern already computed the result and the
1824 machine modes are compatible. */
1825 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1831 cc = gen_rtx_REG (mode, CC_REGNUM);
1832 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1835 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1838 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1840 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1841 conditional branch testing the result. */
1844 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1845 rtx cmp, rtx new_rtx)
1847 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1848 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1852 /* Emit a jump instruction to TARGET and return it. If COND is
1853 NULL_RTX, emit an unconditional jump, else a conditional jump under
1857 s390_emit_jump (rtx target, rtx cond)
1861 target = gen_rtx_LABEL_REF (VOIDmode, target);
1863 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1865 insn = gen_rtx_SET (pc_rtx, target);
1866 return emit_jump_insn (insn);
1869 /* Return branch condition mask to implement a branch
1870 specified by CODE. Return -1 for invalid comparisons. */
1873 s390_branch_condition_mask (rtx code)
1875 const int CC0 = 1 << 3;
1876 const int CC1 = 1 << 2;
1877 const int CC2 = 1 << 1;
1878 const int CC3 = 1 << 0;
1880 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1881 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1882 gcc_assert (XEXP (code, 1) == const0_rtx
1883 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1884 && CONST_INT_P (XEXP (code, 1))));
1887 switch (GET_MODE (XEXP (code, 0)))
1891 switch (GET_CODE (code))
1893 case EQ: return CC0;
1894 case NE: return CC1 | CC2 | CC3;
1900 switch (GET_CODE (code))
1902 case EQ: return CC1;
1903 case NE: return CC0 | CC2 | CC3;
1909 switch (GET_CODE (code))
1911 case EQ: return CC2;
1912 case NE: return CC0 | CC1 | CC3;
1918 switch (GET_CODE (code))
1920 case EQ: return CC3;
1921 case NE: return CC0 | CC1 | CC2;
1927 switch (GET_CODE (code))
1929 case EQ: return CC0 | CC2;
1930 case NE: return CC1 | CC3;
1936 switch (GET_CODE (code))
1938 case LTU: return CC2 | CC3; /* carry */
1939 case GEU: return CC0 | CC1; /* no carry */
1945 switch (GET_CODE (code))
1947 case GTU: return CC0 | CC1; /* borrow */
1948 case LEU: return CC2 | CC3; /* no borrow */
1954 switch (GET_CODE (code))
1956 case EQ: return CC0 | CC2;
1957 case NE: return CC1 | CC3;
1958 case LTU: return CC1;
1959 case GTU: return CC3;
1960 case LEU: return CC1 | CC2;
1961 case GEU: return CC2 | CC3;
1966 switch (GET_CODE (code))
1968 case EQ: return CC0;
1969 case NE: return CC1 | CC2 | CC3;
1970 case LTU: return CC1;
1971 case GTU: return CC2;
1972 case LEU: return CC0 | CC1;
1973 case GEU: return CC0 | CC2;
1979 switch (GET_CODE (code))
1981 case EQ: return CC0;
1982 case NE: return CC2 | CC1 | CC3;
1983 case LTU: return CC2;
1984 case GTU: return CC1;
1985 case LEU: return CC0 | CC2;
1986 case GEU: return CC0 | CC1;
1992 switch (GET_CODE (code))
1994 case EQ: return CC0;
1995 case NE: return CC1 | CC2 | CC3;
1996 case LT: return CC1 | CC3;
1997 case GT: return CC2;
1998 case LE: return CC0 | CC1 | CC3;
1999 case GE: return CC0 | CC2;
2005 switch (GET_CODE (code))
2007 case EQ: return CC0;
2008 case NE: return CC1 | CC2 | CC3;
2009 case LT: return CC1;
2010 case GT: return CC2 | CC3;
2011 case LE: return CC0 | CC1;
2012 case GE: return CC0 | CC2 | CC3;
2018 switch (GET_CODE (code))
2020 case EQ: return CC0;
2021 case NE: return CC1 | CC2 | CC3;
2022 case LT: return CC1;
2023 case GT: return CC2;
2024 case LE: return CC0 | CC1;
2025 case GE: return CC0 | CC2;
2026 case UNORDERED: return CC3;
2027 case ORDERED: return CC0 | CC1 | CC2;
2028 case UNEQ: return CC0 | CC3;
2029 case UNLT: return CC1 | CC3;
2030 case UNGT: return CC2 | CC3;
2031 case UNLE: return CC0 | CC1 | CC3;
2032 case UNGE: return CC0 | CC2 | CC3;
2033 case LTGT: return CC1 | CC2;
2039 switch (GET_CODE (code))
2041 case EQ: return CC0;
2042 case NE: return CC2 | CC1 | CC3;
2043 case LT: return CC2;
2044 case GT: return CC1;
2045 case LE: return CC0 | CC2;
2046 case GE: return CC0 | CC1;
2047 case UNORDERED: return CC3;
2048 case ORDERED: return CC0 | CC2 | CC1;
2049 case UNEQ: return CC0 | CC3;
2050 case UNLT: return CC2 | CC3;
2051 case UNGT: return CC1 | CC3;
2052 case UNLE: return CC0 | CC2 | CC3;
2053 case UNGE: return CC0 | CC1 | CC3;
2054 case LTGT: return CC2 | CC1;
2059 /* Vector comparison modes. */
2060 /* CC2 will never be set. It however is part of the negated
2063 switch (GET_CODE (code))
2068 case GE: return CC0;
2069 /* The inverted modes are in fact *any* modes. */
2073 case LT: return CC3 | CC1 | CC2;
2078 switch (GET_CODE (code))
2083 case GE: return CC0 | CC1;
2084 /* The inverted modes are in fact *all* modes. */
2088 case LT: return CC3 | CC2;
2092 switch (GET_CODE (code))
2096 case GE: return CC0;
2097 /* The inverted modes are in fact *any* modes. */
2100 case UNLT: return CC3 | CC1 | CC2;
2105 switch (GET_CODE (code))
2109 case GE: return CC0 | CC1;
2110 /* The inverted modes are in fact *all* modes. */
2113 case UNLT: return CC3 | CC2;
2118 switch (GET_CODE (code))
2121 return INTVAL (XEXP (code, 1));
2123 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2134 /* Return branch condition mask to implement a compare and branch
2135 specified by CODE. Return -1 for invalid comparisons. */
2138 s390_compare_and_branch_condition_mask (rtx code)
2140 const int CC0 = 1 << 3;
2141 const int CC1 = 1 << 2;
2142 const int CC2 = 1 << 1;
2144 switch (GET_CODE (code))
2168 /* If INV is false, return assembler mnemonic string to implement
2169 a branch specified by CODE. If INV is true, return mnemonic
2170 for the corresponding inverted branch. */
2173 s390_branch_condition_mnemonic (rtx code, int inv)
2177 static const char *const mnemonic[16] =
2179 NULL, "o", "h", "nle",
2180 "l", "nhe", "lh", "ne",
2181 "e", "nlh", "he", "nl",
2182 "le", "nh", "no", NULL
2185 if (GET_CODE (XEXP (code, 0)) == REG
2186 && REGNO (XEXP (code, 0)) == CC_REGNUM
2187 && (XEXP (code, 1) == const0_rtx
2188 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2189 && CONST_INT_P (XEXP (code, 1)))))
2190 mask = s390_branch_condition_mask (code);
2192 mask = s390_compare_and_branch_condition_mask (code);
2194 gcc_assert (mask >= 0);
2199 gcc_assert (mask >= 1 && mask <= 14);
2201 return mnemonic[mask];
2204 /* Return the part of op which has a value different from def.
2205 The size of the part is determined by mode.
2206 Use this function only if you already know that op really
2207 contains such a part. */
2209 unsigned HOST_WIDE_INT
2210 s390_extract_part (rtx op, machine_mode mode, int def)
2212 unsigned HOST_WIDE_INT value = 0;
2213 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2214 int part_bits = GET_MODE_BITSIZE (mode);
2215 unsigned HOST_WIDE_INT part_mask
2216 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
2219 for (i = 0; i < max_parts; i++)
2222 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2224 value >>= part_bits;
2226 if ((value & part_mask) != (def & part_mask))
2227 return value & part_mask;
2233 /* If OP is an integer constant of mode MODE with exactly one
2234 part of mode PART_MODE unequal to DEF, return the number of that
2235 part. Otherwise, return -1. */
2238 s390_single_part (rtx op,
2240 machine_mode part_mode,
2243 unsigned HOST_WIDE_INT value = 0;
2244 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2245 unsigned HOST_WIDE_INT part_mask
2246 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
2249 if (GET_CODE (op) != CONST_INT)
2252 for (i = 0; i < n_parts; i++)
2255 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2257 value >>= GET_MODE_BITSIZE (part_mode);
2259 if ((value & part_mask) != (def & part_mask))
2267 return part == -1 ? -1 : n_parts - 1 - part;
2270 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2271 bits and no other bits are set in (the lower SIZE bits of) IN.
2273 PSTART and PEND can be used to obtain the start and end
2274 position (inclusive) of the bitfield relative to 64
2275 bits. *PSTART / *PEND gives the position of the first/last bit
2276 of the bitfield counting from the highest order bit starting
2280 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2281 int *pstart, int *pend)
2285 int lowbit = sizeof (HOST_WIDE_INT) * BITS_PER_UNIT - 1;
2286 int highbit = sizeof (HOST_WIDE_INT) * BITS_PER_UNIT - size;
2287 unsigned HOST_WIDE_INT bitmask = 1ULL;
2289 gcc_assert (!!pstart == !!pend);
2290 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2293 /* Look for the rightmost bit of a contiguous range of ones. */
2300 /* Look for the firt zero bit after the range of ones. */
2301 if (! (bitmask & in))
2305 /* We're one past the last one-bit. */
2309 /* No one bits found. */
2312 if (start > highbit)
2314 unsigned HOST_WIDE_INT mask;
2316 /* Calculate a mask for all bits beyond the contiguous bits. */
2317 mask = ((~(0ULL) >> highbit) & (~(0ULL) << (lowbit - start + 1)));
2319 /* There are more bits set beyond the first range of one bits. */
2332 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2333 if ~IN contains a contiguous bitfield. In that case, *END is <
2336 If WRAP_P is true, a bitmask that wraps around is also tested.
2337 When a wraparoud occurs *START is greater than *END (in
2338 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2339 part of the range. If WRAP_P is false, no wraparound is
2343 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2344 int size, int *start, int *end)
2346 int bs = sizeof (HOST_WIDE_INT) * BITS_PER_UNIT;
2349 gcc_assert (!!start == !!end);
2350 if ((in & ((~(0ULL)) >> (bs - size))) == 0)
2351 /* This cannot be expressed as a contiguous bitmask. Exit early because
2352 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2355 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2360 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2366 gcc_assert (s >= 1);
2367 *start = ((e + 1) & (bs - 1));
2368 *end = ((s - 1 + bs) & (bs - 1));
2374 /* Return true if OP contains the same contiguous bitfield in *all*
2375 its elements. START and END can be used to obtain the start and
2376 end position of the bitfield.
2378 START/STOP give the position of the first/last bit of the bitfield
2379 counting from the lowest order bit starting with zero. In order to
2380 use these values for S/390 instructions this has to be converted to
2381 "bits big endian" style. */
2384 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2386 unsigned HOST_WIDE_INT mask;
2391 gcc_assert (!!start == !!end);
2392 if (!const_vec_duplicate_p (op, &elt)
2393 || !CONST_INT_P (elt))
2396 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2398 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2402 mask = UINTVAL (elt);
2404 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2409 int bs = sizeof (HOST_WIDE_INT) * BITS_PER_UNIT;
2411 *start -= (bs - size);
2412 *end -= (bs - size);
2420 /* Return true if C consists only of byte chunks being either 0 or
2421 0xff. If MASK is !=NULL a byte mask is generated which is
2422 appropriate for the vector generate byte mask instruction. */
2425 s390_bytemask_vector_p (rtx op, unsigned *mask)
2428 unsigned tmp_mask = 0;
2429 int nunit, unit_size;
2431 if (!VECTOR_MODE_P (GET_MODE (op))
2432 || GET_CODE (op) != CONST_VECTOR
2433 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2436 nunit = GET_MODE_NUNITS (GET_MODE (op));
2437 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2439 for (i = 0; i < nunit; i++)
2441 unsigned HOST_WIDE_INT c;
2444 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2447 c = UINTVAL (XVECEXP (op, 0, i));
2448 for (j = 0; j < unit_size; j++)
2450 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2452 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2453 c = c >> BITS_PER_UNIT;
2463 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2464 equivalent to a shift followed by the AND. In particular, CONTIG
2465 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2466 for ROTL indicate a rotate to the right. */
2469 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2474 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2478 return (64 - end >= rotl);
2481 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2483 rotl = -rotl + (64 - bitsize);
2484 return (start >= rotl);
2488 /* Check whether we can (and want to) split a double-word
2489 move in mode MODE from SRC to DST into two single-word
2490 moves, moving the subword FIRST_SUBWORD first. */
2493 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2495 /* Floating point and vector registers cannot be split. */
2496 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2499 /* We don't need to split if operands are directly accessible. */
2500 if (s_operand (src, mode) || s_operand (dst, mode))
2503 /* Non-offsettable memory references cannot be split. */
2504 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2505 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2508 /* Moving the first subword must not clobber a register
2509 needed to move the second subword. */
2510 if (register_operand (dst, mode))
2512 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2513 if (reg_overlap_mentioned_p (subreg, src))
2520 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2521 and [MEM2, MEM2 + SIZE] do overlap and false
2525 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2527 rtx addr1, addr2, addr_delta;
2528 HOST_WIDE_INT delta;
2530 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2536 addr1 = XEXP (mem1, 0);
2537 addr2 = XEXP (mem2, 0);
2539 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2541 /* This overlapping check is used by peepholes merging memory block operations.
2542 Overlapping operations would otherwise be recognized by the S/390 hardware
2543 and would fall back to a slower implementation. Allowing overlapping
2544 operations would lead to slow code but not to wrong code. Therefore we are
2545 somewhat optimistic if we cannot prove that the memory blocks are
2547 That's why we return false here although this may accept operations on
2548 overlapping memory areas. */
2549 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2552 delta = INTVAL (addr_delta);
2555 || (delta > 0 && delta < size)
2556 || (delta < 0 && -delta < size))
2562 /* Check whether the address of memory reference MEM2 equals exactly
2563 the address of memory reference MEM1 plus DELTA. Return true if
2564 we can prove this to be the case, false otherwise. */
2567 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2569 rtx addr1, addr2, addr_delta;
2571 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2574 addr1 = XEXP (mem1, 0);
2575 addr2 = XEXP (mem2, 0);
2577 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2578 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2584 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2587 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2590 machine_mode wmode = mode;
2591 rtx dst = operands[0];
2592 rtx src1 = operands[1];
2593 rtx src2 = operands[2];
2596 /* If we cannot handle the operation directly, use a temp register. */
2597 if (!s390_logical_operator_ok_p (operands))
2598 dst = gen_reg_rtx (mode);
2600 /* QImode and HImode patterns make sense only if we have a destination
2601 in memory. Otherwise perform the operation in SImode. */
2602 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2605 /* Widen operands if required. */
2608 if (GET_CODE (dst) == SUBREG
2609 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2611 else if (REG_P (dst))
2612 dst = gen_rtx_SUBREG (wmode, dst, 0);
2614 dst = gen_reg_rtx (wmode);
2616 if (GET_CODE (src1) == SUBREG
2617 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2619 else if (GET_MODE (src1) != VOIDmode)
2620 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2622 if (GET_CODE (src2) == SUBREG
2623 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2625 else if (GET_MODE (src2) != VOIDmode)
2626 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2629 /* Emit the instruction. */
2630 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2631 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2632 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2634 /* Fix up the destination if needed. */
2635 if (dst != operands[0])
2636 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2639 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2642 s390_logical_operator_ok_p (rtx *operands)
2644 /* If the destination operand is in memory, it needs to coincide
2645 with one of the source operands. After reload, it has to be
2646 the first source operand. */
2647 if (GET_CODE (operands[0]) == MEM)
2648 return rtx_equal_p (operands[0], operands[1])
2649 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2654 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2655 operand IMMOP to switch from SS to SI type instructions. */
2658 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2660 int def = code == AND ? -1 : 0;
2664 gcc_assert (GET_CODE (*memop) == MEM);
2665 gcc_assert (!MEM_VOLATILE_P (*memop));
2667 mask = s390_extract_part (*immop, QImode, def);
2668 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2669 gcc_assert (part >= 0);
2671 *memop = adjust_address (*memop, QImode, part);
2672 *immop = gen_int_mode (mask, QImode);
2676 /* How to allocate a 'struct machine_function'. */
2678 static struct machine_function *
2679 s390_init_machine_status (void)
2681 return ggc_cleared_alloc<machine_function> ();
2684 /* Map for smallest class containing reg regno. */
2686 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2687 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2688 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2689 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2690 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2691 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2692 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2693 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2694 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2695 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2696 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2697 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2698 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2699 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2700 VEC_REGS, VEC_REGS /* 52 */
2703 /* Return attribute type of insn. */
2705 static enum attr_type
2706 s390_safe_attr_type (rtx_insn *insn)
2708 if (recog_memoized (insn) >= 0)
2709 return get_attr_type (insn);
2714 /* Return true if DISP is a valid short displacement. */
2717 s390_short_displacement (rtx disp)
2719 /* No displacement is OK. */
2723 /* Without the long displacement facility we don't need to
2724 distingiush between long and short displacement. */
2725 if (!TARGET_LONG_DISPLACEMENT)
2728 /* Integer displacement in range. */
2729 if (GET_CODE (disp) == CONST_INT)
2730 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2732 /* GOT offset is not OK, the GOT can be large. */
2733 if (GET_CODE (disp) == CONST
2734 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2735 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2736 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2739 /* All other symbolic constants are literal pool references,
2740 which are OK as the literal pool must be small. */
2741 if (GET_CODE (disp) == CONST)
2747 /* Decompose a RTL expression ADDR for a memory address into
2748 its components, returned in OUT.
2750 Returns false if ADDR is not a valid memory address, true
2751 otherwise. If OUT is NULL, don't return the components,
2752 but check for validity only.
2754 Note: Only addresses in canonical form are recognized.
2755 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2756 canonical form so that they will be recognized. */
2759 s390_decompose_address (rtx addr, struct s390_address *out)
2761 HOST_WIDE_INT offset = 0;
2762 rtx base = NULL_RTX;
2763 rtx indx = NULL_RTX;
2764 rtx disp = NULL_RTX;
2766 bool pointer = false;
2767 bool base_ptr = false;
2768 bool indx_ptr = false;
2769 bool literal_pool = false;
2771 /* We may need to substitute the literal pool base register into the address
2772 below. However, at this point we do not know which register is going to
2773 be used as base, so we substitute the arg pointer register. This is going
2774 to be treated as holding a pointer below -- it shouldn't be used for any
2776 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2778 /* Decompose address into base + index + displacement. */
2780 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2783 else if (GET_CODE (addr) == PLUS)
2785 rtx op0 = XEXP (addr, 0);
2786 rtx op1 = XEXP (addr, 1);
2787 enum rtx_code code0 = GET_CODE (op0);
2788 enum rtx_code code1 = GET_CODE (op1);
2790 if (code0 == REG || code0 == UNSPEC)
2792 if (code1 == REG || code1 == UNSPEC)
2794 indx = op0; /* index + base */
2800 base = op0; /* base + displacement */
2805 else if (code0 == PLUS)
2807 indx = XEXP (op0, 0); /* index + base + disp */
2808 base = XEXP (op0, 1);
2819 disp = addr; /* displacement */
2821 /* Extract integer part of displacement. */
2825 if (GET_CODE (disp) == CONST_INT)
2827 offset = INTVAL (disp);
2830 else if (GET_CODE (disp) == CONST
2831 && GET_CODE (XEXP (disp, 0)) == PLUS
2832 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2834 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2835 disp = XEXP (XEXP (disp, 0), 0);
2839 /* Strip off CONST here to avoid special case tests later. */
2840 if (disp && GET_CODE (disp) == CONST)
2841 disp = XEXP (disp, 0);
2843 /* We can convert literal pool addresses to
2844 displacements by basing them off the base register. */
2845 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2847 /* Either base or index must be free to hold the base register. */
2849 base = fake_pool_base, literal_pool = true;
2851 indx = fake_pool_base, literal_pool = true;
2855 /* Mark up the displacement. */
2856 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2857 UNSPEC_LTREL_OFFSET);
2860 /* Validate base register. */
2863 if (GET_CODE (base) == UNSPEC)
2864 switch (XINT (base, 1))
2868 disp = gen_rtx_UNSPEC (Pmode,
2869 gen_rtvec (1, XVECEXP (base, 0, 0)),
2870 UNSPEC_LTREL_OFFSET);
2874 base = XVECEXP (base, 0, 1);
2877 case UNSPEC_LTREL_BASE:
2878 if (XVECLEN (base, 0) == 1)
2879 base = fake_pool_base, literal_pool = true;
2881 base = XVECEXP (base, 0, 1);
2888 if (!REG_P (base) || GET_MODE (base) != Pmode)
2891 if (REGNO (base) == STACK_POINTER_REGNUM
2892 || REGNO (base) == FRAME_POINTER_REGNUM
2893 || ((reload_completed || reload_in_progress)
2894 && frame_pointer_needed
2895 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2896 || REGNO (base) == ARG_POINTER_REGNUM
2898 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2899 pointer = base_ptr = true;
2901 if ((reload_completed || reload_in_progress)
2902 && base == cfun->machine->base_reg)
2903 pointer = base_ptr = literal_pool = true;
2906 /* Validate index register. */
2909 if (GET_CODE (indx) == UNSPEC)
2910 switch (XINT (indx, 1))
2914 disp = gen_rtx_UNSPEC (Pmode,
2915 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2916 UNSPEC_LTREL_OFFSET);
2920 indx = XVECEXP (indx, 0, 1);
2923 case UNSPEC_LTREL_BASE:
2924 if (XVECLEN (indx, 0) == 1)
2925 indx = fake_pool_base, literal_pool = true;
2927 indx = XVECEXP (indx, 0, 1);
2934 if (!REG_P (indx) || GET_MODE (indx) != Pmode)
2937 if (REGNO (indx) == STACK_POINTER_REGNUM
2938 || REGNO (indx) == FRAME_POINTER_REGNUM
2939 || ((reload_completed || reload_in_progress)
2940 && frame_pointer_needed
2941 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2942 || REGNO (indx) == ARG_POINTER_REGNUM
2944 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2945 pointer = indx_ptr = true;
2947 if ((reload_completed || reload_in_progress)
2948 && indx == cfun->machine->base_reg)
2949 pointer = indx_ptr = literal_pool = true;
2952 /* Prefer to use pointer as base, not index. */
2953 if (base && indx && !base_ptr
2954 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2961 /* Validate displacement. */
2964 /* If virtual registers are involved, the displacement will change later
2965 anyway as the virtual registers get eliminated. This could make a
2966 valid displacement invalid, but it is more likely to make an invalid
2967 displacement valid, because we sometimes access the register save area
2968 via negative offsets to one of those registers.
2969 Thus we don't check the displacement for validity here. If after
2970 elimination the displacement turns out to be invalid after all,
2971 this is fixed up by reload in any case. */
2972 /* LRA maintains always displacements up to date and we need to
2973 know the displacement is right during all LRA not only at the
2974 final elimination. */
2976 || (base != arg_pointer_rtx
2977 && indx != arg_pointer_rtx
2978 && base != return_address_pointer_rtx
2979 && indx != return_address_pointer_rtx
2980 && base != frame_pointer_rtx
2981 && indx != frame_pointer_rtx
2982 && base != virtual_stack_vars_rtx
2983 && indx != virtual_stack_vars_rtx))
2984 if (!DISP_IN_RANGE (offset))
2989 /* All the special cases are pointers. */
2992 /* In the small-PIC case, the linker converts @GOT
2993 and @GOTNTPOFF offsets to possible displacements. */
2994 if (GET_CODE (disp) == UNSPEC
2995 && (XINT (disp, 1) == UNSPEC_GOT
2996 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3002 /* Accept pool label offsets. */
3003 else if (GET_CODE (disp) == UNSPEC
3004 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3007 /* Accept literal pool references. */
3008 else if (GET_CODE (disp) == UNSPEC
3009 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3011 /* In case CSE pulled a non literal pool reference out of
3012 the pool we have to reject the address. This is
3013 especially important when loading the GOT pointer on non
3014 zarch CPUs. In this case the literal pool contains an lt
3015 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3016 will most likely exceed the displacement. */
3017 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3018 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3021 orig_disp = gen_rtx_CONST (Pmode, disp);
3024 /* If we have an offset, make sure it does not
3025 exceed the size of the constant pool entry. */
3026 rtx sym = XVECEXP (disp, 0, 0);
3027 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3030 orig_disp = plus_constant (Pmode, orig_disp, offset);
3045 out->disp = orig_disp;
3046 out->pointer = pointer;
3047 out->literal_pool = literal_pool;
3053 /* Decompose a RTL expression OP for an address style operand into its
3054 components, and return the base register in BASE and the offset in
3055 OFFSET. While OP looks like an address it is never supposed to be
3058 Return true if OP is a valid address operand, false if not. */
3061 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3062 HOST_WIDE_INT *offset)
3066 /* We can have an integer constant, an address register,
3067 or a sum of the two. */
3068 if (CONST_SCALAR_INT_P (op))
3073 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3078 while (op && GET_CODE (op) == SUBREG)
3079 op = SUBREG_REG (op);
3081 if (op && GET_CODE (op) != REG)
3086 if (off == NULL_RTX)
3088 else if (CONST_INT_P (off))
3089 *offset = INTVAL (off);
3090 else if (CONST_WIDE_INT_P (off))
3091 /* The offset will anyway be cut down to 12 bits so take just
3092 the lowest order chunk of the wide int. */
3093 *offset = CONST_WIDE_INT_ELT (off, 0);
3104 /* Return true if CODE is a valid address without index. */
3107 s390_legitimate_address_without_index_p (rtx op)
3109 struct s390_address addr;
3111 if (!s390_decompose_address (XEXP (op, 0), &addr))
3120 /* Return TRUE if ADDR is an operand valid for a load/store relative
3121 instruction. Be aware that the alignment of the operand needs to
3122 be checked separately.
3123 Valid addresses are single references or a sum of a reference and a
3124 constant integer. Return these parts in SYMREF and ADDEND. You can
3125 pass NULL in REF and/or ADDEND if you are not interested in these
3126 values. Literal pool references are *not* considered symbol
3130 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3132 HOST_WIDE_INT tmpaddend = 0;
3134 if (GET_CODE (addr) == CONST)
3135 addr = XEXP (addr, 0);
3137 if (GET_CODE (addr) == PLUS)
3139 if (!CONST_INT_P (XEXP (addr, 1)))
3142 tmpaddend = INTVAL (XEXP (addr, 1));
3143 addr = XEXP (addr, 0);
3146 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3147 || (GET_CODE (addr) == UNSPEC
3148 && (XINT (addr, 1) == UNSPEC_GOTENT
3149 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3154 *addend = tmpaddend;
3161 /* Return true if the address in OP is valid for constraint letter C
3162 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3163 pool MEMs should be accepted. Only the Q, R, S, T constraint
3164 letters are allowed for C. */
3167 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3169 struct s390_address addr;
3170 bool decomposed = false;
3172 /* This check makes sure that no symbolic address (except literal
3173 pool references) are accepted by the R or T constraints. */
3174 if (s390_loadrelative_operand_p (op, NULL, NULL))
3177 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3180 if (!s390_decompose_address (op, &addr))
3182 if (addr.literal_pool)
3187 /* With reload, we sometimes get intermediate address forms that are
3188 actually invalid as-is, but we need to accept them in the most
3189 generic cases below ('R' or 'T'), since reload will in fact fix
3190 them up. LRA behaves differently here; we never see such forms,
3191 but on the other hand, we need to strictly reject every invalid
3192 address form. Perform this check right up front. */
3193 if (lra_in_progress)
3195 if (!decomposed && !s390_decompose_address (op, &addr))
3202 case 'Q': /* no index short displacement */
3203 if (!decomposed && !s390_decompose_address (op, &addr))
3207 if (!s390_short_displacement (addr.disp))
3211 case 'R': /* with index short displacement */
3212 if (TARGET_LONG_DISPLACEMENT)
3214 if (!decomposed && !s390_decompose_address (op, &addr))
3216 if (!s390_short_displacement (addr.disp))
3219 /* Any invalid address here will be fixed up by reload,
3220 so accept it for the most generic constraint. */
3223 case 'S': /* no index long displacement */
3224 if (!decomposed && !s390_decompose_address (op, &addr))
3230 case 'T': /* with index long displacement */
3231 /* Any invalid address here will be fixed up by reload,
3232 so accept it for the most generic constraint. */
3242 /* Evaluates constraint strings described by the regular expression
3243 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3244 the constraint given in STR, or 0 else. */
3247 s390_mem_constraint (const char *str, rtx op)
3254 /* Check for offsettable variants of memory constraints. */
3255 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3257 if ((reload_completed || reload_in_progress)
3258 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3260 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3262 /* Check for non-literal-pool variants of memory constraints. */
3265 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3270 if (GET_CODE (op) != MEM)
3272 return s390_check_qrst_address (c, XEXP (op, 0), true);
3274 /* Simply check for the basic form of a shift count. Reload will
3275 take care of making sure we have a proper base register. */
3276 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3280 return s390_check_qrst_address (str[1], op, true);
3288 /* Evaluates constraint strings starting with letter O. Input
3289 parameter C is the second letter following the "O" in the constraint
3290 string. Returns 1 if VALUE meets the respective constraint and 0
3294 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3302 return trunc_int_for_mode (value, SImode) == value;
3306 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3309 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3317 /* Evaluates constraint strings starting with letter N. Parameter STR
3318 contains the letters following letter "N" in the constraint string.
3319 Returns true if VALUE matches the constraint. */
3322 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3324 machine_mode mode, part_mode;
3326 int part, part_goal;
3332 part_goal = str[0] - '0';
3376 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3379 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3382 if (part_goal != -1 && part_goal != part)
3389 /* Returns true if the input parameter VALUE is a float zero. */
3392 s390_float_const_zero_p (rtx value)
3394 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3395 && value == CONST0_RTX (GET_MODE (value)));
3398 /* Implement TARGET_REGISTER_MOVE_COST. */
3401 s390_register_move_cost (machine_mode mode,
3402 reg_class_t from, reg_class_t to)
3404 /* On s390, copy between fprs and gprs is expensive. */
3406 /* It becomes somewhat faster having ldgr/lgdr. */
3407 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3409 /* ldgr is single cycle. */
3410 if (reg_classes_intersect_p (from, GENERAL_REGS)
3411 && reg_classes_intersect_p (to, FP_REGS))
3413 /* lgdr needs 3 cycles. */
3414 if (reg_classes_intersect_p (to, GENERAL_REGS)
3415 && reg_classes_intersect_p (from, FP_REGS))
3419 /* Otherwise copying is done via memory. */
3420 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3421 && reg_classes_intersect_p (to, FP_REGS))
3422 || (reg_classes_intersect_p (from, FP_REGS)
3423 && reg_classes_intersect_p (to, GENERAL_REGS)))
3429 /* Implement TARGET_MEMORY_MOVE_COST. */
3432 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3433 reg_class_t rclass ATTRIBUTE_UNUSED,
3434 bool in ATTRIBUTE_UNUSED)
3439 /* Compute a (partial) cost for rtx X. Return true if the complete
3440 cost has been computed, and false if subexpressions should be
3441 scanned. In either case, *TOTAL contains the cost result. The
3442 initial value of *TOTAL is the default value computed by
3443 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3444 code of the superexpression of x. */
3447 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3448 int opno ATTRIBUTE_UNUSED,
3449 int *total, bool speed ATTRIBUTE_UNUSED)
3451 int code = GET_CODE (x);
3459 case CONST_WIDE_INT:
3466 if (GET_CODE (XEXP (x, 0)) == AND
3467 && GET_CODE (XEXP (x, 1)) == ASHIFT
3468 && REG_P (XEXP (XEXP (x, 0), 0))
3469 && REG_P (XEXP (XEXP (x, 1), 0))
3470 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3471 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3472 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3473 (1UL << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3475 *total = COSTS_N_INSNS (2);
3488 *total = COSTS_N_INSNS (1);
3493 *total = COSTS_N_INSNS (1);
3501 rtx left = XEXP (x, 0);
3502 rtx right = XEXP (x, 1);
3503 if (GET_CODE (right) == CONST_INT
3504 && CONST_OK_FOR_K (INTVAL (right)))
3505 *total = s390_cost->mhi;
3506 else if (GET_CODE (left) == SIGN_EXTEND)
3507 *total = s390_cost->mh;
3509 *total = s390_cost->ms; /* msr, ms, msy */
3514 rtx left = XEXP (x, 0);
3515 rtx right = XEXP (x, 1);
3518 if (GET_CODE (right) == CONST_INT
3519 && CONST_OK_FOR_K (INTVAL (right)))
3520 *total = s390_cost->mghi;
3521 else if (GET_CODE (left) == SIGN_EXTEND)
3522 *total = s390_cost->msgf;
3524 *total = s390_cost->msg; /* msgr, msg */
3526 else /* TARGET_31BIT */
3528 if (GET_CODE (left) == SIGN_EXTEND
3529 && GET_CODE (right) == SIGN_EXTEND)
3530 /* mulsidi case: mr, m */
3531 *total = s390_cost->m;
3532 else if (GET_CODE (left) == ZERO_EXTEND
3533 && GET_CODE (right) == ZERO_EXTEND
3534 && TARGET_CPU_ZARCH)
3535 /* umulsidi case: ml, mlr */
3536 *total = s390_cost->ml;
3538 /* Complex calculation is required. */
3539 *total = COSTS_N_INSNS (40);
3545 *total = s390_cost->mult_df;
3548 *total = s390_cost->mxbr;
3559 *total = s390_cost->madbr;
3562 *total = s390_cost->maebr;
3567 /* Negate in the third argument is free: FMSUB. */
3568 if (GET_CODE (XEXP (x, 2)) == NEG)
3570 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3571 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3572 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3579 if (mode == TImode) /* 128 bit division */
3580 *total = s390_cost->dlgr;
3581 else if (mode == DImode)
3583 rtx right = XEXP (x, 1);
3584 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3585 *total = s390_cost->dlr;
3586 else /* 64 by 64 bit division */
3587 *total = s390_cost->dlgr;
3589 else if (mode == SImode) /* 32 bit division */
3590 *total = s390_cost->dlr;
3597 rtx right = XEXP (x, 1);
3598 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3600 *total = s390_cost->dsgfr;
3602 *total = s390_cost->dr;
3603 else /* 64 by 64 bit division */
3604 *total = s390_cost->dsgr;
3606 else if (mode == SImode) /* 32 bit division */
3607 *total = s390_cost->dlr;
3608 else if (mode == SFmode)
3610 *total = s390_cost->debr;
3612 else if (mode == DFmode)
3614 *total = s390_cost->ddbr;
3616 else if (mode == TFmode)
3618 *total = s390_cost->dxbr;
3624 *total = s390_cost->sqebr;
3625 else if (mode == DFmode)
3626 *total = s390_cost->sqdbr;
3628 *total = s390_cost->sqxbr;
3633 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3634 || outer_code == PLUS || outer_code == MINUS
3635 || outer_code == COMPARE)
3640 *total = COSTS_N_INSNS (1);
3641 if (GET_CODE (XEXP (x, 0)) == AND
3642 && GET_CODE (XEXP (x, 1)) == CONST_INT
3643 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3645 rtx op0 = XEXP (XEXP (x, 0), 0);
3646 rtx op1 = XEXP (XEXP (x, 0), 1);
3647 rtx op2 = XEXP (x, 1);
3649 if (memory_operand (op0, GET_MODE (op0))
3650 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3652 if (register_operand (op0, GET_MODE (op0))
3653 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3663 /* Return the cost of an address rtx ADDR. */
3666 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3667 addr_space_t as ATTRIBUTE_UNUSED,
3668 bool speed ATTRIBUTE_UNUSED)
3670 struct s390_address ad;
3671 if (!s390_decompose_address (addr, &ad))
3674 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3677 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3679 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3681 int misalign ATTRIBUTE_UNUSED)
3683 switch (type_of_cost)
3693 case cond_branch_not_taken:
3695 case vec_promote_demote:
3696 case unaligned_load:
3697 case unaligned_store:
3700 case cond_branch_taken:
3704 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3711 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3712 otherwise return 0. */
3715 tls_symbolic_operand (rtx op)
3717 if (GET_CODE (op) != SYMBOL_REF)
3719 return SYMBOL_REF_TLS_MODEL (op);
3722 /* Split DImode access register reference REG (on 64-bit) into its constituent
3723 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3724 gen_highpart cannot be used as they assume all registers are word-sized,
3725 while our access registers have only half that size. */
3728 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3730 gcc_assert (TARGET_64BIT);
3731 gcc_assert (ACCESS_REG_P (reg));
3732 gcc_assert (GET_MODE (reg) == DImode);
3733 gcc_assert (!(REGNO (reg) & 1));
3735 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3736 *hi = gen_rtx_REG (SImode, REGNO (reg));
3739 /* Return true if OP contains a symbol reference */
3742 symbolic_reference_mentioned_p (rtx op)
3747 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3750 fmt = GET_RTX_FORMAT (GET_CODE (op));
3751 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3757 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3758 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3762 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3769 /* Return true if OP contains a reference to a thread-local symbol. */
3772 tls_symbolic_reference_mentioned_p (rtx op)
3777 if (GET_CODE (op) == SYMBOL_REF)
3778 return tls_symbolic_operand (op);
3780 fmt = GET_RTX_FORMAT (GET_CODE (op));
3781 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3787 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3788 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3792 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3800 /* Return true if OP is a legitimate general operand when
3801 generating PIC code. It is given that flag_pic is on
3802 and that OP satisfies CONSTANT_P. */
3805 legitimate_pic_operand_p (rtx op)
3807 /* Accept all non-symbolic constants. */
3808 if (!SYMBOLIC_CONST (op))
3811 /* Reject everything else; must be handled
3812 via emit_symbolic_move. */
3816 /* Returns true if the constant value OP is a legitimate general operand.
3817 It is given that OP satisfies CONSTANT_P. */
3820 s390_legitimate_constant_p (machine_mode mode, rtx op)
3822 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3824 if (GET_MODE_SIZE (mode) != 16)
3827 if (!satisfies_constraint_j00 (op)
3828 && !satisfies_constraint_jm1 (op)
3829 && !satisfies_constraint_jKK (op)
3830 && !satisfies_constraint_jxx (op)
3831 && !satisfies_constraint_jyy (op))
3835 /* Accept all non-symbolic constants. */
3836 if (!SYMBOLIC_CONST (op))
3839 /* Accept immediate LARL operands. */
3840 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3843 /* Thread-local symbols are never legal constants. This is
3844 so that emit_call knows that computing such addresses
3845 might require a function call. */
3846 if (TLS_SYMBOLIC_CONST (op))
3849 /* In the PIC case, symbolic constants must *not* be
3850 forced into the literal pool. We accept them here,
3851 so that they will be handled by emit_symbolic_move. */
3855 /* All remaining non-PIC symbolic constants are
3856 forced into the literal pool. */
3860 /* Determine if it's legal to put X into the constant pool. This
3861 is not possible if X contains the address of a symbol that is
3862 not constant (TLS) or not known at final link time (PIC). */
3865 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3867 switch (GET_CODE (x))
3871 case CONST_WIDE_INT:
3873 /* Accept all non-symbolic constants. */
3877 /* Labels are OK iff we are non-PIC. */
3878 return flag_pic != 0;
3881 /* 'Naked' TLS symbol references are never OK,
3882 non-TLS symbols are OK iff we are non-PIC. */
3883 if (tls_symbolic_operand (x))
3886 return flag_pic != 0;
3889 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3892 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3893 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3896 switch (XINT (x, 1))
3898 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3899 case UNSPEC_LTREL_OFFSET:
3907 case UNSPEC_GOTNTPOFF:
3908 case UNSPEC_INDNTPOFF:
3911 /* If the literal pool shares the code section, be put
3912 execute template placeholders into the pool as well. */
3914 return TARGET_CPU_ZARCH;
3926 /* Returns true if the constant value OP is a legitimate general
3927 operand during and after reload. The difference to
3928 legitimate_constant_p is that this function will not accept
3929 a constant that would need to be forced to the literal pool
3930 before it can be used as operand.
3931 This function accepts all constants which can be loaded directly
3935 legitimate_reload_constant_p (rtx op)
3937 /* Accept la(y) operands. */
3938 if (GET_CODE (op) == CONST_INT
3939 && DISP_IN_RANGE (INTVAL (op)))
3942 /* Accept l(g)hi/l(g)fi operands. */
3943 if (GET_CODE (op) == CONST_INT
3944 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3947 /* Accept lliXX operands. */
3949 && GET_CODE (op) == CONST_INT
3950 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3951 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3955 && GET_CODE (op) == CONST_INT
3956 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3957 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3960 /* Accept larl operands. */
3961 if (TARGET_CPU_ZARCH
3962 && larl_operand (op, VOIDmode))
3965 /* Accept floating-point zero operands that fit into a single GPR. */
3966 if (GET_CODE (op) == CONST_DOUBLE
3967 && s390_float_const_zero_p (op)
3968 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3971 /* Accept double-word operands that can be split. */
3972 if (GET_CODE (op) == CONST_WIDE_INT
3973 || (GET_CODE (op) == CONST_INT
3974 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
3976 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
3977 rtx hi = operand_subword (op, 0, 0, dword_mode);
3978 rtx lo = operand_subword (op, 1, 0, dword_mode);
3979 return legitimate_reload_constant_p (hi)
3980 && legitimate_reload_constant_p (lo);
3983 /* Everything else cannot be handled without reload. */
3987 /* Returns true if the constant value OP is a legitimate fp operand
3988 during and after reload.
3989 This function accepts all constants which can be loaded directly
3993 legitimate_reload_fp_constant_p (rtx op)
3995 /* Accept floating-point zero operands if the load zero instruction
3996 can be used. Prior to z196 the load fp zero instruction caused a
3997 performance penalty if the result is used as BFP number. */
3999 && GET_CODE (op) == CONST_DOUBLE
4000 && s390_float_const_zero_p (op))
4006 /* Returns true if the constant value OP is a legitimate vector operand
4007 during and after reload.
4008 This function accepts all constants which can be loaded directly
4012 legitimate_reload_vector_constant_p (rtx op)
4014 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4015 && (satisfies_constraint_j00 (op)
4016 || satisfies_constraint_jm1 (op)
4017 || satisfies_constraint_jKK (op)
4018 || satisfies_constraint_jxx (op)
4019 || satisfies_constraint_jyy (op)))
4025 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4026 return the class of reg to actually use. */
4029 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4031 switch (GET_CODE (op))
4033 /* Constants we cannot reload into general registers
4034 must be forced into the literal pool. */
4038 case CONST_WIDE_INT:
4039 if (reg_class_subset_p (GENERAL_REGS, rclass)
4040 && legitimate_reload_constant_p (op))
4041 return GENERAL_REGS;
4042 else if (reg_class_subset_p (ADDR_REGS, rclass)
4043 && legitimate_reload_constant_p (op))
4045 else if (reg_class_subset_p (FP_REGS, rclass)
4046 && legitimate_reload_fp_constant_p (op))
4048 else if (reg_class_subset_p (VEC_REGS, rclass)
4049 && legitimate_reload_vector_constant_p (op))
4054 /* If a symbolic constant or a PLUS is reloaded,
4055 it is most likely being used as an address, so
4056 prefer ADDR_REGS. If 'class' is not a superset
4057 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4059 /* Symrefs cannot be pushed into the literal pool with -fPIC
4060 so we *MUST NOT* return NO_REGS for these cases
4061 (s390_cannot_force_const_mem will return true).
4063 On the other hand we MUST return NO_REGS for symrefs with
4064 invalid addend which might have been pushed to the literal
4065 pool (no -fPIC). Usually we would expect them to be
4066 handled via secondary reload but this does not happen if
4067 they are used as literal pool slot replacement in reload
4068 inheritance (see emit_input_reload_insns). */
4069 if (TARGET_CPU_ZARCH
4070 && GET_CODE (XEXP (op, 0)) == PLUS
4071 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4072 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4074 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4082 if (!legitimate_reload_constant_p (op))
4086 /* load address will be used. */
4087 if (reg_class_subset_p (ADDR_REGS, rclass))
4099 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4100 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4104 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4106 HOST_WIDE_INT addend;
4109 /* The "required alignment" might be 0 (e.g. for certain structs
4110 accessed via BLKmode). Early abort in this case, as well as when
4111 an alignment > 8 is required. */
4112 if (alignment < 2 || alignment > 8)
4115 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4118 if (addend & (alignment - 1))
4121 if (GET_CODE (symref) == SYMBOL_REF)
4123 /* We have load-relative instructions for 2-byte, 4-byte, and
4124 8-byte alignment so allow only these. */
4127 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4128 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4129 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4130 default: return false;
4134 if (GET_CODE (symref) == UNSPEC
4135 && alignment <= UNITS_PER_LONG)
4141 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4142 operand SCRATCH is used to reload the even part of the address and
4146 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4148 HOST_WIDE_INT addend;
4151 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4155 /* Easy case. The addend is even so larl will do fine. */
4156 emit_move_insn (reg, addr);
4159 /* We can leave the scratch register untouched if the target
4160 register is a valid base register. */
4161 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4162 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4165 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4166 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4169 emit_move_insn (scratch,
4170 gen_rtx_CONST (Pmode,
4171 gen_rtx_PLUS (Pmode, symref,
4172 GEN_INT (addend - 1))));
4174 emit_move_insn (scratch, symref);
4176 /* Increment the address using la in order to avoid clobbering cc. */
4177 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4181 /* Generate what is necessary to move between REG and MEM using
4182 SCRATCH. The direction is given by TOMEM. */
4185 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4187 /* Reload might have pulled a constant out of the literal pool.
4188 Force it back in. */
4189 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4190 || GET_CODE (mem) == CONST_WIDE_INT
4191 || GET_CODE (mem) == CONST_VECTOR
4192 || GET_CODE (mem) == CONST)
4193 mem = force_const_mem (GET_MODE (reg), mem);
4195 gcc_assert (MEM_P (mem));
4197 /* For a load from memory we can leave the scratch register
4198 untouched if the target register is a valid base register. */
4200 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4201 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4202 && GET_MODE (reg) == GET_MODE (scratch))
4205 /* Load address into scratch register. Since we can't have a
4206 secondary reload for a secondary reload we have to cover the case
4207 where larl would need a secondary reload here as well. */
4208 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4210 /* Now we can use a standard load/store to do the move. */
4212 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4214 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4217 /* Inform reload about cases where moving X with a mode MODE to a register in
4218 RCLASS requires an extra scratch or immediate register. Return the class
4219 needed for the immediate register. */
4222 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4223 machine_mode mode, secondary_reload_info *sri)
4225 enum reg_class rclass = (enum reg_class) rclass_i;
4227 /* Intermediate register needed. */
4228 if (reg_classes_intersect_p (CC_REGS, rclass))
4229 return GENERAL_REGS;
4233 /* The vst/vl vector move instructions allow only for short
4236 && GET_CODE (XEXP (x, 0)) == PLUS
4237 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4238 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4239 && reg_class_subset_p (rclass, VEC_REGS)
4240 && (!reg_class_subset_p (rclass, FP_REGS)
4241 || (GET_MODE_SIZE (mode) > 8
4242 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4245 sri->icode = (TARGET_64BIT ?
4246 CODE_FOR_reloaddi_la_in :
4247 CODE_FOR_reloadsi_la_in);
4249 sri->icode = (TARGET_64BIT ?
4250 CODE_FOR_reloaddi_la_out :
4251 CODE_FOR_reloadsi_la_out);
4257 HOST_WIDE_INT offset;
4260 /* On z10 several optimizer steps may generate larl operands with
4263 && s390_loadrelative_operand_p (x, &symref, &offset)
4265 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4266 && (offset & 1) == 1)
4267 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4268 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4270 /* Handle all the (mem (symref)) accesses we cannot use the z10
4271 instructions for. */
4273 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4275 || !reg_class_subset_p (rclass, GENERAL_REGS)
4276 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4277 || !s390_check_symref_alignment (XEXP (x, 0),
4278 GET_MODE_SIZE (mode))))
4280 #define __SECONDARY_RELOAD_CASE(M,m) \
4283 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4284 CODE_FOR_reload##m##di_tomem_z10; \
4286 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4287 CODE_FOR_reload##m##si_tomem_z10; \
4290 switch (GET_MODE (x))
4292 __SECONDARY_RELOAD_CASE (QI, qi);
4293 __SECONDARY_RELOAD_CASE (HI, hi);
4294 __SECONDARY_RELOAD_CASE (SI, si);
4295 __SECONDARY_RELOAD_CASE (DI, di);
4296 __SECONDARY_RELOAD_CASE (TI, ti);
4297 __SECONDARY_RELOAD_CASE (SF, sf);
4298 __SECONDARY_RELOAD_CASE (DF, df);
4299 __SECONDARY_RELOAD_CASE (TF, tf);
4300 __SECONDARY_RELOAD_CASE (SD, sd);
4301 __SECONDARY_RELOAD_CASE (DD, dd);
4302 __SECONDARY_RELOAD_CASE (TD, td);
4303 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4304 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4305 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4306 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4307 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4308 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4309 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4310 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4311 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4312 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4313 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4314 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4315 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4316 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4317 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4318 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4319 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4320 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4321 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4322 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4323 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4327 #undef __SECONDARY_RELOAD_CASE
4331 /* We need a scratch register when loading a PLUS expression which
4332 is not a legitimate operand of the LOAD ADDRESS instruction. */
4333 /* LRA can deal with transformation of plus op very well -- so we
4334 don't need to prompt LRA in this case. */
4335 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4336 sri->icode = (TARGET_64BIT ?
4337 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4339 /* Performing a multiword move from or to memory we have to make sure the
4340 second chunk in memory is addressable without causing a displacement
4341 overflow. If that would be the case we calculate the address in
4342 a scratch register. */
4344 && GET_CODE (XEXP (x, 0)) == PLUS
4345 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4346 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4347 + GET_MODE_SIZE (mode) - 1))
4349 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4350 in a s_operand address since we may fallback to lm/stm. So we only
4351 have to care about overflows in the b+i+d case. */
4352 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4353 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4354 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4355 /* For FP_REGS no lm/stm is available so this check is triggered
4356 for displacement overflows in b+i+d and b+d like addresses. */
4357 || (reg_classes_intersect_p (FP_REGS, rclass)
4358 && s390_class_max_nregs (FP_REGS, mode) > 1))
4361 sri->icode = (TARGET_64BIT ?
4362 CODE_FOR_reloaddi_la_in :
4363 CODE_FOR_reloadsi_la_in);
4365 sri->icode = (TARGET_64BIT ?
4366 CODE_FOR_reloaddi_la_out :
4367 CODE_FOR_reloadsi_la_out);
4371 /* A scratch address register is needed when a symbolic constant is
4372 copied to r0 compiling with -fPIC. In other cases the target
4373 register might be used as temporary (see legitimize_pic_address). */
4374 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4375 sri->icode = (TARGET_64BIT ?
4376 CODE_FOR_reloaddi_PIC_addr :
4377 CODE_FOR_reloadsi_PIC_addr);
4379 /* Either scratch or no register needed. */
4383 /* Generate code to load SRC, which is PLUS that is not a
4384 legitimate operand for the LA instruction, into TARGET.
4385 SCRATCH may be used as scratch register. */
4388 s390_expand_plus_operand (rtx target, rtx src,
4392 struct s390_address ad;
4394 /* src must be a PLUS; get its two operands. */
4395 gcc_assert (GET_CODE (src) == PLUS);
4396 gcc_assert (GET_MODE (src) == Pmode);
4398 /* Check if any of the two operands is already scheduled
4399 for replacement by reload. This can happen e.g. when
4400 float registers occur in an address. */
4401 sum1 = find_replacement (&XEXP (src, 0));
4402 sum2 = find_replacement (&XEXP (src, 1));
4403 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4405 /* If the address is already strictly valid, there's nothing to do. */
4406 if (!s390_decompose_address (src, &ad)
4407 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4408 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4410 /* Otherwise, one of the operands cannot be an address register;
4411 we reload its value into the scratch register. */
4412 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4414 emit_move_insn (scratch, sum1);
4417 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4419 emit_move_insn (scratch, sum2);
4423 /* According to the way these invalid addresses are generated
4424 in reload.c, it should never happen (at least on s390) that
4425 *neither* of the PLUS components, after find_replacements
4426 was applied, is an address register. */
4427 if (sum1 == scratch && sum2 == scratch)
4433 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4436 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4437 is only ever performed on addresses, so we can mark the
4438 sum as legitimate for LA in any case. */
4439 s390_load_address (target, src);
4443 /* Return true if ADDR is a valid memory address.
4444 STRICT specifies whether strict register checking applies. */
4447 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4449 struct s390_address ad;
4452 && larl_operand (addr, VOIDmode)
4453 && (mode == VOIDmode
4454 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4457 if (!s390_decompose_address (addr, &ad))
4462 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4465 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4471 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4472 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4476 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4477 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4483 /* Return true if OP is a valid operand for the LA instruction.
4484 In 31-bit, we need to prove that the result is used as an
4485 address, as LA performs only a 31-bit addition. */
4488 legitimate_la_operand_p (rtx op)
4490 struct s390_address addr;
4491 if (!s390_decompose_address (op, &addr))
4494 return (TARGET_64BIT || addr.pointer);
4497 /* Return true if it is valid *and* preferable to use LA to
4498 compute the sum of OP1 and OP2. */
4501 preferred_la_operand_p (rtx op1, rtx op2)
4503 struct s390_address addr;
4505 if (op2 != const0_rtx)
4506 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4508 if (!s390_decompose_address (op1, &addr))
4510 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4512 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4515 /* Avoid LA instructions with index register on z196; it is
4516 preferable to use regular add instructions when possible.
4517 Starting with zEC12 the la with index register is "uncracked"
4519 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4522 if (!TARGET_64BIT && !addr.pointer)
4528 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4529 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4535 /* Emit a forced load-address operation to load SRC into DST.
4536 This will use the LOAD ADDRESS instruction even in situations
4537 where legitimate_la_operand_p (SRC) returns false. */
4540 s390_load_address (rtx dst, rtx src)
4543 emit_move_insn (dst, src);
4545 emit_insn (gen_force_la_31 (dst, src));
4548 /* Return a legitimate reference for ORIG (an address) using the
4549 register REG. If REG is 0, a new pseudo is generated.
4551 There are two types of references that must be handled:
4553 1. Global data references must load the address from the GOT, via
4554 the PIC reg. An insn is emitted to do this load, and the reg is
4557 2. Static data references, constant pool addresses, and code labels
4558 compute the address as an offset from the GOT, whose base is in
4559 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4560 differentiate them from global data objects. The returned
4561 address is the PIC reg + an unspec constant.
4563 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4564 reg also appears in the address. */
4567 legitimize_pic_address (rtx orig, rtx reg)
4570 rtx addend = const0_rtx;
4573 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4575 if (GET_CODE (addr) == CONST)
4576 addr = XEXP (addr, 0);
4578 if (GET_CODE (addr) == PLUS)
4580 addend = XEXP (addr, 1);
4581 addr = XEXP (addr, 0);
4584 if ((GET_CODE (addr) == LABEL_REF
4585 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
4586 || (GET_CODE (addr) == UNSPEC &&
4587 (XINT (addr, 1) == UNSPEC_GOTENT
4588 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4589 && GET_CODE (addend) == CONST_INT)
4591 /* This can be locally addressed. */
4593 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4594 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4595 gen_rtx_CONST (Pmode, addr) : addr);
4597 if (TARGET_CPU_ZARCH
4598 && larl_operand (const_addr, VOIDmode)
4599 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
4600 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
4602 if (INTVAL (addend) & 1)
4604 /* LARL can't handle odd offsets, so emit a pair of LARL
4606 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4608 if (!DISP_IN_RANGE (INTVAL (addend)))
4610 HOST_WIDE_INT even = INTVAL (addend) - 1;
4611 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4612 addr = gen_rtx_CONST (Pmode, addr);
4613 addend = const1_rtx;
4616 emit_move_insn (temp, addr);
4617 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4621 s390_load_address (reg, new_rtx);
4627 /* If the offset is even, we can just use LARL. This
4628 will happen automatically. */
4633 /* No larl - Access local symbols relative to the GOT. */
4635 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4637 if (reload_in_progress || reload_completed)
4638 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4640 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4641 if (addend != const0_rtx)
4642 addr = gen_rtx_PLUS (Pmode, addr, addend);
4643 addr = gen_rtx_CONST (Pmode, addr);
4644 addr = force_const_mem (Pmode, addr);
4645 emit_move_insn (temp, addr);
4647 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4650 s390_load_address (reg, new_rtx);
4655 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4657 /* A non-local symbol reference without addend.
4659 The symbol ref is wrapped into an UNSPEC to make sure the
4660 proper operand modifier (@GOT or @GOTENT) will be emitted.
4661 This will tell the linker to put the symbol into the GOT.
4663 Additionally the code dereferencing the GOT slot is emitted here.
4665 An addend to the symref needs to be added afterwards.
4666 legitimize_pic_address calls itself recursively to handle
4667 that case. So no need to do it here. */
4670 reg = gen_reg_rtx (Pmode);
4674 /* Use load relative if possible.
4675 lgrl <target>, sym@GOTENT */
4676 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4677 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4678 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4680 emit_move_insn (reg, new_rtx);
4683 else if (flag_pic == 1)
4685 /* Assume GOT offset is a valid displacement operand (< 4k
4686 or < 512k with z990). This is handled the same way in
4687 both 31- and 64-bit code (@GOT).
4688 lg <target>, sym@GOT(r12) */
4690 if (reload_in_progress || reload_completed)
4691 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4693 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4694 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4695 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4696 new_rtx = gen_const_mem (Pmode, new_rtx);
4697 emit_move_insn (reg, new_rtx);
4700 else if (TARGET_CPU_ZARCH)
4702 /* If the GOT offset might be >= 4k, we determine the position
4703 of the GOT entry via a PC-relative LARL (@GOTENT).
4704 larl temp, sym@GOTENT
4705 lg <target>, 0(temp) */
4707 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4709 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4710 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4712 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4713 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4714 emit_move_insn (temp, new_rtx);
4716 new_rtx = gen_const_mem (Pmode, temp);
4717 emit_move_insn (reg, new_rtx);
4723 /* If the GOT offset might be >= 4k, we have to load it
4724 from the literal pool (@GOT).
4726 lg temp, lit-litbase(r13)
4727 lg <target>, 0(temp)
4728 lit: .long sym@GOT */
4730 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4732 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4733 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4735 if (reload_in_progress || reload_completed)
4736 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4738 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4739 addr = gen_rtx_CONST (Pmode, addr);
4740 addr = force_const_mem (Pmode, addr);
4741 emit_move_insn (temp, addr);
4743 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4744 new_rtx = gen_const_mem (Pmode, new_rtx);
4745 emit_move_insn (reg, new_rtx);
4749 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4751 gcc_assert (XVECLEN (addr, 0) == 1);
4752 switch (XINT (addr, 1))
4754 /* These address symbols (or PLT slots) relative to the GOT
4755 (not GOT slots!). In general this will exceed the
4756 displacement range so these value belong into the literal
4760 new_rtx = force_const_mem (Pmode, orig);
4763 /* For -fPIC the GOT size might exceed the displacement
4764 range so make sure the value is in the literal pool. */
4767 new_rtx = force_const_mem (Pmode, orig);
4770 /* For @GOTENT larl is used. This is handled like local
4776 /* @PLT is OK as is on 64-bit, must be converted to
4777 GOT-relative @PLTOFF on 31-bit. */
4779 if (!TARGET_CPU_ZARCH)
4781 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4783 if (reload_in_progress || reload_completed)
4784 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4786 addr = XVECEXP (addr, 0, 0);
4787 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4789 if (addend != const0_rtx)
4790 addr = gen_rtx_PLUS (Pmode, addr, addend);
4791 addr = gen_rtx_CONST (Pmode, addr);
4792 addr = force_const_mem (Pmode, addr);
4793 emit_move_insn (temp, addr);
4795 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4798 s390_load_address (reg, new_rtx);
4803 /* On 64 bit larl can be used. This case is handled like
4804 local symbol refs. */
4808 /* Everything else cannot happen. */
4813 else if (addend != const0_rtx)
4815 /* Otherwise, compute the sum. */
4817 rtx base = legitimize_pic_address (addr, reg);
4818 new_rtx = legitimize_pic_address (addend,
4819 base == reg ? NULL_RTX : reg);
4820 if (GET_CODE (new_rtx) == CONST_INT)
4821 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4824 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4826 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4827 new_rtx = XEXP (new_rtx, 1);
4829 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4832 if (GET_CODE (new_rtx) == CONST)
4833 new_rtx = XEXP (new_rtx, 0);
4834 new_rtx = force_operand (new_rtx, 0);
4840 /* Load the thread pointer into a register. */
4843 s390_get_thread_pointer (void)
4845 rtx tp = gen_reg_rtx (Pmode);
4847 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4848 mark_reg_pointer (tp, BITS_PER_WORD);
4853 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4854 in s390_tls_symbol which always refers to __tls_get_offset.
4855 The returned offset is written to RESULT_REG and an USE rtx is
4856 generated for TLS_CALL. */
4858 static GTY(()) rtx s390_tls_symbol;
4861 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4866 emit_insn (s390_load_got ());
4868 if (!s390_tls_symbol)
4869 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4871 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4872 gen_rtx_REG (Pmode, RETURN_REGNUM));
4874 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4875 RTL_CONST_CALL_P (insn) = 1;
4878 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4879 this (thread-local) address. REG may be used as temporary. */
4882 legitimize_tls_address (rtx addr, rtx reg)
4884 rtx new_rtx, tls_call, temp, base, r2;
4887 if (GET_CODE (addr) == SYMBOL_REF)
4888 switch (tls_symbolic_operand (addr))
4890 case TLS_MODEL_GLOBAL_DYNAMIC:
4892 r2 = gen_rtx_REG (Pmode, 2);
4893 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4894 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4895 new_rtx = force_const_mem (Pmode, new_rtx);
4896 emit_move_insn (r2, new_rtx);
4897 s390_emit_tls_call_insn (r2, tls_call);
4898 insn = get_insns ();
4901 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4902 temp = gen_reg_rtx (Pmode);
4903 emit_libcall_block (insn, temp, r2, new_rtx);
4905 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4908 s390_load_address (reg, new_rtx);
4913 case TLS_MODEL_LOCAL_DYNAMIC:
4915 r2 = gen_rtx_REG (Pmode, 2);
4916 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4917 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4918 new_rtx = force_const_mem (Pmode, new_rtx);
4919 emit_move_insn (r2, new_rtx);
4920 s390_emit_tls_call_insn (r2, tls_call);
4921 insn = get_insns ();
4924 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4925 temp = gen_reg_rtx (Pmode);
4926 emit_libcall_block (insn, temp, r2, new_rtx);
4928 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4929 base = gen_reg_rtx (Pmode);
4930 s390_load_address (base, new_rtx);
4932 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4933 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4934 new_rtx = force_const_mem (Pmode, new_rtx);
4935 temp = gen_reg_rtx (Pmode);
4936 emit_move_insn (temp, new_rtx);
4938 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4941 s390_load_address (reg, new_rtx);
4946 case TLS_MODEL_INITIAL_EXEC:
4949 /* Assume GOT offset < 4k. This is handled the same way
4950 in both 31- and 64-bit code. */
4952 if (reload_in_progress || reload_completed)
4953 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4955 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4956 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4957 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4958 new_rtx = gen_const_mem (Pmode, new_rtx);
4959 temp = gen_reg_rtx (Pmode);
4960 emit_move_insn (temp, new_rtx);
4962 else if (TARGET_CPU_ZARCH)
4964 /* If the GOT offset might be >= 4k, we determine the position
4965 of the GOT entry via a PC-relative LARL. */
4967 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4968 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4969 temp = gen_reg_rtx (Pmode);
4970 emit_move_insn (temp, new_rtx);
4972 new_rtx = gen_const_mem (Pmode, temp);
4973 temp = gen_reg_rtx (Pmode);
4974 emit_move_insn (temp, new_rtx);
4978 /* If the GOT offset might be >= 4k, we have to load it
4979 from the literal pool. */
4981 if (reload_in_progress || reload_completed)
4982 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4984 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4985 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4986 new_rtx = force_const_mem (Pmode, new_rtx);
4987 temp = gen_reg_rtx (Pmode);
4988 emit_move_insn (temp, new_rtx);
4990 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4991 new_rtx = gen_const_mem (Pmode, new_rtx);
4993 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4994 temp = gen_reg_rtx (Pmode);
4995 emit_insn (gen_rtx_SET (temp, new_rtx));
4999 /* In position-dependent code, load the absolute address of
5000 the GOT entry from the literal pool. */
5002 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5003 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5004 new_rtx = force_const_mem (Pmode, new_rtx);
5005 temp = gen_reg_rtx (Pmode);
5006 emit_move_insn (temp, new_rtx);
5009 new_rtx = gen_const_mem (Pmode, new_rtx);
5010 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5011 temp = gen_reg_rtx (Pmode);
5012 emit_insn (gen_rtx_SET (temp, new_rtx));
5015 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5018 s390_load_address (reg, new_rtx);
5023 case TLS_MODEL_LOCAL_EXEC:
5024 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5025 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5026 new_rtx = force_const_mem (Pmode, new_rtx);
5027 temp = gen_reg_rtx (Pmode);
5028 emit_move_insn (temp, new_rtx);
5030 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5033 s390_load_address (reg, new_rtx);
5042 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5044 switch (XINT (XEXP (addr, 0), 1))
5046 case UNSPEC_INDNTPOFF:
5047 gcc_assert (TARGET_CPU_ZARCH);
5056 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5057 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5059 new_rtx = XEXP (XEXP (addr, 0), 0);
5060 if (GET_CODE (new_rtx) != SYMBOL_REF)
5061 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5063 new_rtx = legitimize_tls_address (new_rtx, reg);
5064 new_rtx = plus_constant (Pmode, new_rtx,
5065 INTVAL (XEXP (XEXP (addr, 0), 1)));
5066 new_rtx = force_operand (new_rtx, 0);
5070 gcc_unreachable (); /* for now ... */
5075 /* Emit insns making the address in operands[1] valid for a standard
5076 move to operands[0]. operands[1] is replaced by an address which
5077 should be used instead of the former RTX to emit the move
5081 emit_symbolic_move (rtx *operands)
5083 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5085 if (GET_CODE (operands[0]) == MEM)
5086 operands[1] = force_reg (Pmode, operands[1]);
5087 else if (TLS_SYMBOLIC_CONST (operands[1]))
5088 operands[1] = legitimize_tls_address (operands[1], temp);
5090 operands[1] = legitimize_pic_address (operands[1], temp);
5093 /* Try machine-dependent ways of modifying an illegitimate address X
5094 to be legitimate. If we find one, return the new, valid address.
5096 OLDX is the address as it was before break_out_memory_refs was called.
5097 In some cases it is useful to look at this to decide what needs to be done.
5099 MODE is the mode of the operand pointed to by X.
5101 When -fpic is used, special handling is needed for symbolic references.
5102 See comments by legitimize_pic_address for details. */
5105 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5106 machine_mode mode ATTRIBUTE_UNUSED)
5108 rtx constant_term = const0_rtx;
5110 if (TLS_SYMBOLIC_CONST (x))
5112 x = legitimize_tls_address (x, 0);
5114 if (s390_legitimate_address_p (mode, x, FALSE))
5117 else if (GET_CODE (x) == PLUS
5118 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5119 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5125 if (SYMBOLIC_CONST (x)
5126 || (GET_CODE (x) == PLUS
5127 && (SYMBOLIC_CONST (XEXP (x, 0))
5128 || SYMBOLIC_CONST (XEXP (x, 1)))))
5129 x = legitimize_pic_address (x, 0);
5131 if (s390_legitimate_address_p (mode, x, FALSE))
5135 x = eliminate_constant_term (x, &constant_term);
5137 /* Optimize loading of large displacements by splitting them
5138 into the multiple of 4K and the rest; this allows the
5139 former to be CSE'd if possible.
5141 Don't do this if the displacement is added to a register
5142 pointing into the stack frame, as the offsets will
5143 change later anyway. */
5145 if (GET_CODE (constant_term) == CONST_INT
5146 && !TARGET_LONG_DISPLACEMENT
5147 && !DISP_IN_RANGE (INTVAL (constant_term))
5148 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5150 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5151 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5153 rtx temp = gen_reg_rtx (Pmode);
5154 rtx val = force_operand (GEN_INT (upper), temp);
5156 emit_move_insn (temp, val);
5158 x = gen_rtx_PLUS (Pmode, x, temp);
5159 constant_term = GEN_INT (lower);
5162 if (GET_CODE (x) == PLUS)
5164 if (GET_CODE (XEXP (x, 0)) == REG)
5166 rtx temp = gen_reg_rtx (Pmode);
5167 rtx val = force_operand (XEXP (x, 1), temp);
5169 emit_move_insn (temp, val);
5171 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5174 else if (GET_CODE (XEXP (x, 1)) == REG)
5176 rtx temp = gen_reg_rtx (Pmode);
5177 rtx val = force_operand (XEXP (x, 0), temp);
5179 emit_move_insn (temp, val);
5181 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5185 if (constant_term != const0_rtx)
5186 x = gen_rtx_PLUS (Pmode, x, constant_term);
5191 /* Try a machine-dependent way of reloading an illegitimate address AD
5192 operand. If we find one, push the reload and return the new address.
5194 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5195 and TYPE is the reload type of the current reload. */
5198 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5199 int opnum, int type)
5201 if (!optimize || TARGET_LONG_DISPLACEMENT)
5204 if (GET_CODE (ad) == PLUS)
5206 rtx tem = simplify_binary_operation (PLUS, Pmode,
5207 XEXP (ad, 0), XEXP (ad, 1));
5212 if (GET_CODE (ad) == PLUS
5213 && GET_CODE (XEXP (ad, 0)) == REG
5214 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5215 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5217 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5218 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5219 rtx cst, tem, new_rtx;
5221 cst = GEN_INT (upper);
5222 if (!legitimate_reload_constant_p (cst))
5223 cst = force_const_mem (Pmode, cst);
5225 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5226 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5228 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5229 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5230 opnum, (enum reload_type) type);
5237 /* Emit code to move LEN bytes from DST to SRC. */
5240 s390_expand_movmem (rtx dst, rtx src, rtx len)
5242 /* When tuning for z10 or higher we rely on the Glibc functions to
5243 do the right thing. Only for constant lengths below 64k we will
5244 generate inline code. */
5245 if (s390_tune >= PROCESSOR_2097_Z10
5246 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5249 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5251 if (INTVAL (len) > 0)
5252 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
5255 else if (TARGET_MVCLE)
5257 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5262 rtx dst_addr, src_addr, count, blocks, temp;
5263 rtx_code_label *loop_start_label = gen_label_rtx ();
5264 rtx_code_label *loop_end_label = gen_label_rtx ();
5265 rtx_code_label *end_label = gen_label_rtx ();
5268 mode = GET_MODE (len);
5269 if (mode == VOIDmode)
5272 dst_addr = gen_reg_rtx (Pmode);
5273 src_addr = gen_reg_rtx (Pmode);
5274 count = gen_reg_rtx (mode);
5275 blocks = gen_reg_rtx (mode);
5277 convert_move (count, len, 1);
5278 emit_cmp_and_jump_insns (count, const0_rtx,
5279 EQ, NULL_RTX, mode, 1, end_label);
5281 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5282 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5283 dst = change_address (dst, VOIDmode, dst_addr);
5284 src = change_address (src, VOIDmode, src_addr);
5286 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5289 emit_move_insn (count, temp);
5291 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5294 emit_move_insn (blocks, temp);
5296 emit_cmp_and_jump_insns (blocks, const0_rtx,
5297 EQ, NULL_RTX, mode, 1, loop_end_label);
5299 emit_label (loop_start_label);
5302 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5306 /* Issue a read prefetch for the +3 cache line. */
5307 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5308 const0_rtx, const0_rtx);
5309 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5310 emit_insn (prefetch);
5312 /* Issue a write prefetch for the +3 cache line. */
5313 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5314 const1_rtx, const0_rtx);
5315 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5316 emit_insn (prefetch);
5319 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5320 s390_load_address (dst_addr,
5321 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5322 s390_load_address (src_addr,
5323 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5325 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5328 emit_move_insn (blocks, temp);
5330 emit_cmp_and_jump_insns (blocks, const0_rtx,
5331 EQ, NULL_RTX, mode, 1, loop_end_label);
5333 emit_jump (loop_start_label);
5334 emit_label (loop_end_label);
5336 emit_insn (gen_movmem_short (dst, src,
5337 convert_to_mode (Pmode, count, 1)));
5338 emit_label (end_label);
5343 /* Emit code to set LEN bytes at DST to VAL.
5344 Make use of clrmem if VAL is zero. */
5347 s390_expand_setmem (rtx dst, rtx len, rtx val)
5349 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
5352 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5354 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
5356 if (val == const0_rtx && INTVAL (len) <= 256)
5357 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
5360 /* Initialize memory by storing the first byte. */
5361 emit_move_insn (adjust_address (dst, QImode, 0), val);
5363 if (INTVAL (len) > 1)
5365 /* Initiate 1 byte overlap move.
5366 The first byte of DST is propagated through DSTP1.
5367 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
5368 DST is set to size 1 so the rest of the memory location
5369 does not count as source operand. */
5370 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
5371 set_mem_size (dst, 1);
5373 emit_insn (gen_movmem_short (dstp1, dst,
5374 GEN_INT (INTVAL (len) - 2)));
5379 else if (TARGET_MVCLE)
5381 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5383 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5386 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5392 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5393 rtx_code_label *loop_start_label = gen_label_rtx ();
5394 rtx_code_label *loop_end_label = gen_label_rtx ();
5395 rtx_code_label *end_label = gen_label_rtx ();
5398 mode = GET_MODE (len);
5399 if (mode == VOIDmode)
5402 dst_addr = gen_reg_rtx (Pmode);
5403 count = gen_reg_rtx (mode);
5404 blocks = gen_reg_rtx (mode);
5406 convert_move (count, len, 1);
5407 emit_cmp_and_jump_insns (count, const0_rtx,
5408 EQ, NULL_RTX, mode, 1, end_label);
5410 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5411 dst = change_address (dst, VOIDmode, dst_addr);
5413 if (val == const0_rtx)
5414 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5418 dstp1 = adjust_address (dst, VOIDmode, 1);
5419 set_mem_size (dst, 1);
5421 /* Initialize memory by storing the first byte. */
5422 emit_move_insn (adjust_address (dst, QImode, 0), val);
5424 /* If count is 1 we are done. */
5425 emit_cmp_and_jump_insns (count, const1_rtx,
5426 EQ, NULL_RTX, mode, 1, end_label);
5428 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
5432 emit_move_insn (count, temp);
5434 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5437 emit_move_insn (blocks, temp);
5439 emit_cmp_and_jump_insns (blocks, const0_rtx,
5440 EQ, NULL_RTX, mode, 1, loop_end_label);
5442 emit_label (loop_start_label);
5445 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5447 /* Issue a write prefetch for the +4 cache line. */
5448 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5450 const1_rtx, const0_rtx);
5451 emit_insn (prefetch);
5452 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5455 if (val == const0_rtx)
5456 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5458 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
5459 s390_load_address (dst_addr,
5460 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5462 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5465 emit_move_insn (blocks, temp);
5467 emit_cmp_and_jump_insns (blocks, const0_rtx,
5468 EQ, NULL_RTX, mode, 1, loop_end_label);
5470 emit_jump (loop_start_label);
5471 emit_label (loop_end_label);
5473 if (val == const0_rtx)
5474 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5476 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
5477 emit_label (end_label);
5481 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5482 and return the result in TARGET. */
5485 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5487 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5490 /* When tuning for z10 or higher we rely on the Glibc functions to
5491 do the right thing. Only for constant lengths below 64k we will
5492 generate inline code. */
5493 if (s390_tune >= PROCESSOR_2097_Z10
5494 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5497 /* As the result of CMPINT is inverted compared to what we need,
5498 we have to swap the operands. */
5499 tmp = op0; op0 = op1; op1 = tmp;
5501 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5503 if (INTVAL (len) > 0)
5505 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5506 emit_insn (gen_cmpint (target, ccreg));
5509 emit_move_insn (target, const0_rtx);
5511 else if (TARGET_MVCLE)
5513 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5514 emit_insn (gen_cmpint (target, ccreg));
5518 rtx addr0, addr1, count, blocks, temp;
5519 rtx_code_label *loop_start_label = gen_label_rtx ();
5520 rtx_code_label *loop_end_label = gen_label_rtx ();
5521 rtx_code_label *end_label = gen_label_rtx ();
5524 mode = GET_MODE (len);
5525 if (mode == VOIDmode)
5528 addr0 = gen_reg_rtx (Pmode);
5529 addr1 = gen_reg_rtx (Pmode);
5530 count = gen_reg_rtx (mode);
5531 blocks = gen_reg_rtx (mode);
5533 convert_move (count, len, 1);
5534 emit_cmp_and_jump_insns (count, const0_rtx,
5535 EQ, NULL_RTX, mode, 1, end_label);
5537 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5538 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5539 op0 = change_address (op0, VOIDmode, addr0);
5540 op1 = change_address (op1, VOIDmode, addr1);
5542 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5545 emit_move_insn (count, temp);
5547 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5550 emit_move_insn (blocks, temp);
5552 emit_cmp_and_jump_insns (blocks, const0_rtx,
5553 EQ, NULL_RTX, mode, 1, loop_end_label);
5555 emit_label (loop_start_label);
5558 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5562 /* Issue a read prefetch for the +2 cache line of operand 1. */
5563 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5564 const0_rtx, const0_rtx);
5565 emit_insn (prefetch);
5566 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5568 /* Issue a read prefetch for the +2 cache line of operand 2. */
5569 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5570 const0_rtx, const0_rtx);
5571 emit_insn (prefetch);
5572 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5575 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5576 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5577 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5578 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5579 temp = gen_rtx_SET (pc_rtx, temp);
5580 emit_jump_insn (temp);
5582 s390_load_address (addr0,
5583 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5584 s390_load_address (addr1,
5585 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5587 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5590 emit_move_insn (blocks, temp);
5592 emit_cmp_and_jump_insns (blocks, const0_rtx,
5593 EQ, NULL_RTX, mode, 1, loop_end_label);
5595 emit_jump (loop_start_label);
5596 emit_label (loop_end_label);
5598 emit_insn (gen_cmpmem_short (op0, op1,
5599 convert_to_mode (Pmode, count, 1)));
5600 emit_label (end_label);
5602 emit_insn (gen_cmpint (target, ccreg));
5607 /* Emit a conditional jump to LABEL for condition code mask MASK using
5608 comparsion operator COMPARISON. Return the emitted jump insn. */
5611 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5615 gcc_assert (comparison == EQ || comparison == NE);
5616 gcc_assert (mask > 0 && mask < 15);
5618 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5619 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5620 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5621 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5622 temp = gen_rtx_SET (pc_rtx, temp);
5623 return emit_jump_insn (temp);
5626 /* Emit the instructions to implement strlen of STRING and store the
5627 result in TARGET. The string has the known ALIGNMENT. This
5628 version uses vector instructions and is therefore not appropriate
5629 for targets prior to z13. */
5632 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5634 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5635 int very_likely = REG_BR_PROB_BASE - 1;
5636 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5637 rtx str_reg = gen_reg_rtx (V16QImode);
5638 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5639 rtx str_idx_reg = gen_reg_rtx (Pmode);
5640 rtx result_reg = gen_reg_rtx (V16QImode);
5641 rtx is_aligned_label = gen_label_rtx ();
5642 rtx into_loop_label = NULL_RTX;
5643 rtx loop_start_label = gen_label_rtx ();
5645 rtx len = gen_reg_rtx (QImode);
5648 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5649 emit_move_insn (str_idx_reg, const0_rtx);
5651 if (INTVAL (alignment) < 16)
5653 /* Check whether the address happens to be aligned properly so
5654 jump directly to the aligned loop. */
5655 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5656 str_addr_base_reg, GEN_INT (15)),
5657 const0_rtx, EQ, NULL_RTX,
5658 Pmode, 1, is_aligned_label);
5660 temp = gen_reg_rtx (Pmode);
5661 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5662 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5663 gcc_assert (REG_P (temp));
5664 highest_index_to_load_reg =
5665 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5666 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5667 gcc_assert (REG_P (highest_index_to_load_reg));
5668 emit_insn (gen_vllv16qi (str_reg,
5669 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5670 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5672 into_loop_label = gen_label_rtx ();
5673 s390_emit_jump (into_loop_label, NULL_RTX);
5677 emit_label (is_aligned_label);
5678 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5680 /* Reaching this point we are only performing 16 bytes aligned
5682 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5684 emit_label (loop_start_label);
5685 LABEL_NUSES (loop_start_label) = 1;
5687 /* Load 16 bytes of the string into VR. */
5688 emit_move_insn (str_reg,
5689 gen_rtx_MEM (V16QImode,
5690 gen_rtx_PLUS (Pmode, str_idx_reg,
5691 str_addr_base_reg)));
5692 if (into_loop_label != NULL_RTX)
5694 emit_label (into_loop_label);
5695 LABEL_NUSES (into_loop_label) = 1;
5698 /* Increment string index by 16 bytes. */
5699 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5700 str_idx_reg, 1, OPTAB_DIRECT);
5702 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5703 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5705 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5706 REG_BR_PROB, very_likely);
5707 emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
5709 /* If the string pointer wasn't aligned we have loaded less then 16
5710 bytes and the remaining bytes got filled with zeros (by vll).
5711 Now we have to check whether the resulting index lies within the
5712 bytes actually part of the string. */
5714 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5715 highest_index_to_load_reg);
5716 s390_load_address (highest_index_to_load_reg,
5717 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5720 emit_insn (gen_movdicc (str_idx_reg, cond,
5721 highest_index_to_load_reg, str_idx_reg));
5723 emit_insn (gen_movsicc (str_idx_reg, cond,
5724 highest_index_to_load_reg, str_idx_reg));
5726 add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
5729 expand_binop (Pmode, add_optab, str_idx_reg,
5730 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5731 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5733 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5734 convert_to_mode (Pmode, len, 1),
5735 target, 1, OPTAB_DIRECT);
5737 emit_move_insn (target, temp);
5741 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5743 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5744 rtx temp = gen_reg_rtx (Pmode);
5745 rtx src_addr = XEXP (src, 0);
5746 rtx dst_addr = XEXP (dst, 0);
5747 rtx src_addr_reg = gen_reg_rtx (Pmode);
5748 rtx dst_addr_reg = gen_reg_rtx (Pmode);
5749 rtx offset = gen_reg_rtx (Pmode);
5750 rtx vsrc = gen_reg_rtx (V16QImode);
5751 rtx vpos = gen_reg_rtx (V16QImode);
5752 rtx loadlen = gen_reg_rtx (SImode);
5753 rtx gpos_qi = gen_reg_rtx(QImode);
5754 rtx gpos = gen_reg_rtx (SImode);
5755 rtx done_label = gen_label_rtx ();
5756 rtx loop_label = gen_label_rtx ();
5757 rtx exit_label = gen_label_rtx ();
5758 rtx full_label = gen_label_rtx ();
5760 /* Perform a quick check for string ending on the first up to 16
5761 bytes and exit early if successful. */
5763 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5764 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5765 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5766 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5767 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5768 /* gpos is the byte index if a zero was found and 16 otherwise.
5769 So if it is lower than the loaded bytes we have a hit. */
5770 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5772 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5774 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5776 emit_jump (exit_label);
5779 emit_label (full_label);
5780 LABEL_NUSES (full_label) = 1;
5782 /* Calculate `offset' so that src + offset points to the last byte
5783 before 16 byte alignment. */
5785 /* temp = src_addr & 0xf */
5786 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5789 /* offset = 0xf - temp */
5790 emit_move_insn (offset, GEN_INT (15));
5791 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5794 /* Store `offset' bytes in the dstination string. The quick check
5795 has loaded at least `offset' bytes into vsrc. */
5797 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5799 /* Advance to the next byte to be loaded. */
5800 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5803 /* Make sure the addresses are single regs which can be used as a
5805 emit_move_insn (src_addr_reg, src_addr);
5806 emit_move_insn (dst_addr_reg, dst_addr);
5810 emit_label (loop_label);
5811 LABEL_NUSES (loop_label) = 1;
5813 emit_move_insn (vsrc,
5814 gen_rtx_MEM (V16QImode,
5815 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
5817 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
5818 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5819 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
5820 REG_BR_PROB, very_unlikely);
5822 emit_move_insn (gen_rtx_MEM (V16QImode,
5823 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
5826 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
5827 offset, 1, OPTAB_DIRECT);
5829 emit_jump (loop_label);
5834 /* We are done. Add the offset of the zero character to the dst_addr
5835 pointer to get the result. */
5837 emit_label (done_label);
5838 LABEL_NUSES (done_label) = 1;
5840 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
5843 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5844 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5846 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
5848 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
5853 emit_label (exit_label);
5854 LABEL_NUSES (exit_label) = 1;
5858 /* Expand conditional increment or decrement using alc/slb instructions.
5859 Should generate code setting DST to either SRC or SRC + INCREMENT,
5860 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5861 Returns true if successful, false otherwise.
5863 That makes it possible to implement some if-constructs without jumps e.g.:
5864 (borrow = CC0 | CC1 and carry = CC2 | CC3)
5865 unsigned int a, b, c;
5866 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
5867 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
5868 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
5869 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
5871 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5872 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
5873 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5874 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
5875 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
5878 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5879 rtx dst, rtx src, rtx increment)
5881 machine_mode cmp_mode;
5882 machine_mode cc_mode;
5888 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
5889 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
5891 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
5892 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
5897 /* Try ADD LOGICAL WITH CARRY. */
5898 if (increment == const1_rtx)
5900 /* Determine CC mode to use. */
5901 if (cmp_code == EQ || cmp_code == NE)
5903 if (cmp_op1 != const0_rtx)
5905 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5906 NULL_RTX, 0, OPTAB_WIDEN);
5907 cmp_op1 = const0_rtx;
5910 cmp_code = cmp_code == EQ ? LEU : GTU;
5913 if (cmp_code == LTU || cmp_code == LEU)
5918 cmp_code = swap_condition (cmp_code);
5935 /* Emit comparison instruction pattern. */
5936 if (!register_operand (cmp_op0, cmp_mode))
5937 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5939 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5940 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5941 /* We use insn_invalid_p here to add clobbers if required. */
5942 ret = insn_invalid_p (emit_insn (insn), false);
5945 /* Emit ALC instruction pattern. */
5946 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5947 gen_rtx_REG (cc_mode, CC_REGNUM),
5950 if (src != const0_rtx)
5952 if (!register_operand (src, GET_MODE (dst)))
5953 src = force_reg (GET_MODE (dst), src);
5955 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
5956 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
5959 p = rtvec_alloc (2);
5961 gen_rtx_SET (dst, op_res);
5963 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5964 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5969 /* Try SUBTRACT LOGICAL WITH BORROW. */
5970 if (increment == constm1_rtx)
5972 /* Determine CC mode to use. */
5973 if (cmp_code == EQ || cmp_code == NE)
5975 if (cmp_op1 != const0_rtx)
5977 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5978 NULL_RTX, 0, OPTAB_WIDEN);
5979 cmp_op1 = const0_rtx;
5982 cmp_code = cmp_code == EQ ? LEU : GTU;
5985 if (cmp_code == GTU || cmp_code == GEU)
5990 cmp_code = swap_condition (cmp_code);
6007 /* Emit comparison instruction pattern. */
6008 if (!register_operand (cmp_op0, cmp_mode))
6009 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6011 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6012 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6013 /* We use insn_invalid_p here to add clobbers if required. */
6014 ret = insn_invalid_p (emit_insn (insn), false);
6017 /* Emit SLB instruction pattern. */
6018 if (!register_operand (src, GET_MODE (dst)))
6019 src = force_reg (GET_MODE (dst), src);
6021 op_res = gen_rtx_MINUS (GET_MODE (dst),
6022 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6023 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6024 gen_rtx_REG (cc_mode, CC_REGNUM),
6026 p = rtvec_alloc (2);
6028 gen_rtx_SET (dst, op_res);
6030 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6031 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6039 /* Expand code for the insv template. Return true if successful. */
6042 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6044 int bitsize = INTVAL (op1);
6045 int bitpos = INTVAL (op2);
6046 machine_mode mode = GET_MODE (dest);
6048 int smode_bsize, mode_bsize;
6051 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6054 /* Generate INSERT IMMEDIATE (IILL et al). */
6055 /* (set (ze (reg)) (const_int)). */
6057 && register_operand (dest, word_mode)
6058 && (bitpos % 16) == 0
6059 && (bitsize % 16) == 0
6060 && const_int_operand (src, VOIDmode))
6062 HOST_WIDE_INT val = INTVAL (src);
6063 int regpos = bitpos + bitsize;
6065 while (regpos > bitpos)
6067 machine_mode putmode;
6070 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6075 putsize = GET_MODE_BITSIZE (putmode);
6077 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6080 gen_int_mode (val, putmode));
6083 gcc_assert (regpos == bitpos);
6087 smode = smallest_mode_for_size (bitsize, MODE_INT);
6088 smode_bsize = GET_MODE_BITSIZE (smode);
6089 mode_bsize = GET_MODE_BITSIZE (mode);
6091 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6093 && (bitsize % BITS_PER_UNIT) == 0
6095 && (register_operand (src, word_mode)
6096 || const_int_operand (src, VOIDmode)))
6098 /* Emit standard pattern if possible. */
6099 if (smode_bsize == bitsize)
6101 emit_move_insn (adjust_address (dest, smode, 0),
6102 gen_lowpart (smode, src));
6106 /* (set (ze (mem)) (const_int)). */
6107 else if (const_int_operand (src, VOIDmode))
6109 int size = bitsize / BITS_PER_UNIT;
6110 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6112 UNITS_PER_WORD - size);
6114 dest = adjust_address (dest, BLKmode, 0);
6115 set_mem_size (dest, size);
6116 s390_expand_movmem (dest, src_mem, GEN_INT (size));
6120 /* (set (ze (mem)) (reg)). */
6121 else if (register_operand (src, word_mode))
6124 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6128 /* Emit st,stcmh sequence. */
6129 int stcmh_width = bitsize - 32;
6130 int size = stcmh_width / BITS_PER_UNIT;
6132 emit_move_insn (adjust_address (dest, SImode, size),
6133 gen_lowpart (SImode, src));
6134 set_mem_size (dest, size);
6135 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6136 GEN_INT (stcmh_width),
6138 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6144 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6145 if ((bitpos % BITS_PER_UNIT) == 0
6146 && (bitsize % BITS_PER_UNIT) == 0
6147 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6149 && (mode == DImode || mode == SImode)
6150 && register_operand (dest, mode))
6152 /* Emit a strict_low_part pattern if possible. */
6153 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6155 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6156 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6157 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6158 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6162 /* ??? There are more powerful versions of ICM that are not
6163 completely represented in the md file. */
6166 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6167 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6169 machine_mode mode_s = GET_MODE (src);
6171 if (CONSTANT_P (src))
6173 /* For constant zero values the representation with AND
6174 appears to be folded in more situations than the (set
6175 (zero_extract) ...).
6176 We only do this when the start and end of the bitfield
6177 remain in the same SImode chunk. That way nihf or nilf
6179 The AND patterns might still generate a risbg for this. */
6180 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6183 src = force_reg (mode, src);
6185 else if (mode_s != mode)
6187 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6188 src = force_reg (mode_s, src);
6189 src = gen_lowpart (mode, src);
6192 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6193 op = gen_rtx_SET (op, src);
6197 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6198 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6208 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6209 register that holds VAL of mode MODE shifted by COUNT bits. */
6212 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6214 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6215 NULL_RTX, 1, OPTAB_DIRECT);
6216 return expand_simple_binop (SImode, ASHIFT, val, count,
6217 NULL_RTX, 1, OPTAB_DIRECT);
6220 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6221 the result in TARGET. */
6224 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6225 rtx cmp_op1, rtx cmp_op2)
6227 machine_mode mode = GET_MODE (target);
6228 bool neg_p = false, swap_p = false;
6231 if (GET_MODE (cmp_op1) == V2DFmode)
6235 /* NE a != b -> !(a == b) */
6236 case NE: cond = EQ; neg_p = true; break;
6237 /* UNGT a u> b -> !(b >= a) */
6238 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6239 /* UNGE a u>= b -> !(b > a) */
6240 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6241 /* LE: a <= b -> b >= a */
6242 case LE: cond = GE; swap_p = true; break;
6243 /* UNLE: a u<= b -> !(a > b) */
6244 case UNLE: cond = GT; neg_p = true; break;
6245 /* LT: a < b -> b > a */
6246 case LT: cond = GT; swap_p = true; break;
6247 /* UNLT: a u< b -> !(a >= b) */
6248 case UNLT: cond = GE; neg_p = true; break;
6250 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
6253 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
6256 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
6259 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
6268 /* NE: a != b -> !(a == b) */
6269 case NE: cond = EQ; neg_p = true; break;
6270 /* GE: a >= b -> !(b > a) */
6271 case GE: cond = GT; neg_p = true; swap_p = true; break;
6272 /* GEU: a >= b -> !(b > a) */
6273 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6274 /* LE: a <= b -> !(a > b) */
6275 case LE: cond = GT; neg_p = true; break;
6276 /* LEU: a <= b -> !(a > b) */
6277 case LEU: cond = GTU; neg_p = true; break;
6278 /* LT: a < b -> b > a */
6279 case LT: cond = GT; swap_p = true; break;
6280 /* LTU: a < b -> b > a */
6281 case LTU: cond = GTU; swap_p = true; break;
6288 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6291 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6293 cmp_op1, cmp_op2)));
6295 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6298 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6299 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6300 elements in CMP1 and CMP2 fulfill the comparison.
6301 This function is only used to emit patterns for the vx builtins and
6302 therefore only handles comparison codes required by the
6305 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6306 rtx cmp1, rtx cmp2, bool all_p)
6308 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6309 rtx tmp_reg = gen_reg_rtx (SImode);
6310 bool swap_p = false;
6312 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6318 cc_producer_mode = CCVEQmode;
6322 code = swap_condition (code);
6327 cc_producer_mode = CCVIHmode;
6331 code = swap_condition (code);
6336 cc_producer_mode = CCVIHUmode;
6342 scratch_mode = GET_MODE (cmp1);
6343 /* These codes represent inverted CC interpretations. Inverting
6344 an ALL CC mode results in an ANY CC mode and the other way
6345 around. Invert the all_p flag here to compensate for
6347 if (code == NE || code == LE || code == LEU)
6350 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6352 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6358 case EQ: cc_producer_mode = CCVEQmode; break;
6359 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6360 case GT: cc_producer_mode = CCVFHmode; break;
6361 case GE: cc_producer_mode = CCVFHEmode; break;
6362 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6363 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6364 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6365 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6366 default: gcc_unreachable ();
6368 scratch_mode = mode_for_vector (
6369 int_mode_for_mode (GET_MODE_INNER (GET_MODE (cmp1))),
6370 GET_MODE_NUNITS (GET_MODE (cmp1)));
6371 gcc_assert (scratch_mode != BLKmode);
6376 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6388 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6389 gen_rtvec (2, gen_rtx_SET (
6390 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6391 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6392 gen_rtx_CLOBBER (VOIDmode,
6393 gen_rtx_SCRATCH (scratch_mode)))));
6394 emit_move_insn (target, const0_rtx);
6395 emit_move_insn (tmp_reg, const1_rtx);
6397 emit_move_insn (target,
6398 gen_rtx_IF_THEN_ELSE (SImode,
6399 gen_rtx_fmt_ee (code, VOIDmode,
6400 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6405 /* Invert the comparison CODE applied to a CC mode. This is only safe
6406 if we know whether there result was created by a floating point
6407 compare or not. For the CCV modes this is encoded as part of the
6410 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6412 /* Reversal of FP compares takes care -- an ordered compare
6413 becomes an unordered compare and vice versa. */
6414 if (mode == CCVFALLmode || mode == CCVFANYmode)
6415 return reverse_condition_maybe_unordered (code);
6416 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6417 return reverse_condition (code);
6422 /* Generate a vector comparison expression loading either elements of
6423 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6427 s390_expand_vcond (rtx target, rtx then, rtx els,
6428 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6431 machine_mode result_mode;
6434 machine_mode target_mode = GET_MODE (target);
6435 machine_mode cmp_mode = GET_MODE (cmp_op1);
6436 rtx op = (cond == LT) ? els : then;
6438 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6439 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6440 for short and byte (x >> 15 and x >> 7 respectively). */
6441 if ((cond == LT || cond == GE)
6442 && target_mode == cmp_mode
6443 && cmp_op2 == CONST0_RTX (cmp_mode)
6444 && op == CONST0_RTX (target_mode)
6445 && s390_vector_mode_supported_p (target_mode)
6446 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6448 rtx negop = (cond == LT) ? then : els;
6450 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6452 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6453 if (negop == CONST1_RTX (target_mode))
6455 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6456 GEN_INT (shift), target,
6459 emit_move_insn (target, res);
6463 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6464 else if (all_ones_operand (negop, target_mode))
6466 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6467 GEN_INT (shift), target,
6470 emit_move_insn (target, res);
6475 /* We always use an integral type vector to hold the comparison
6477 result_mode = cmp_mode == V2DFmode ? V2DImode : cmp_mode;
6478 result_target = gen_reg_rtx (result_mode);
6480 /* We allow vector immediates as comparison operands that
6481 can be handled by the optimization above but not by the
6482 following code. Hence, force them into registers here. */
6483 if (!REG_P (cmp_op1))
6484 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6486 if (!REG_P (cmp_op2))
6487 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6489 s390_expand_vec_compare (result_target, cond,
6492 /* If the results are supposed to be either -1 or 0 we are done
6493 since this is what our compare instructions generate anyway. */
6494 if (all_ones_operand (then, GET_MODE (then))
6495 && const0_operand (els, GET_MODE (els)))
6497 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6502 /* Otherwise we will do a vsel afterwards. */
6503 /* This gets triggered e.g.
6504 with gcc.c-torture/compile/pr53410-1.c */
6506 then = force_reg (target_mode, then);
6509 els = force_reg (target_mode, els);
6511 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6513 CONST0_RTX (result_mode));
6515 /* We compared the result against zero above so we have to swap then
6517 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6519 gcc_assert (target_mode == GET_MODE (then));
6520 emit_insn (gen_rtx_SET (target, tmp));
6523 /* Emit the RTX necessary to initialize the vector TARGET with values
6526 s390_expand_vec_init (rtx target, rtx vals)
6528 machine_mode mode = GET_MODE (target);
6529 machine_mode inner_mode = GET_MODE_INNER (mode);
6530 int n_elts = GET_MODE_NUNITS (mode);
6531 bool all_same = true, all_regs = true, all_const_int = true;
6535 for (i = 0; i < n_elts; ++i)
6537 x = XVECEXP (vals, 0, i);
6539 if (!CONST_INT_P (x))
6540 all_const_int = false;
6542 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6549 /* Use vector gen mask or vector gen byte mask if possible. */
6550 if (all_same && all_const_int
6551 && (XVECEXP (vals, 0, 0) == const0_rtx
6552 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6554 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6556 emit_insn (gen_rtx_SET (target,
6557 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6563 emit_insn (gen_rtx_SET (target,
6564 gen_rtx_VEC_DUPLICATE (mode,
6565 XVECEXP (vals, 0, 0))));
6569 if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode)
6571 /* Use vector load pair. */
6572 emit_insn (gen_rtx_SET (target,
6573 gen_rtx_VEC_CONCAT (mode,
6574 XVECEXP (vals, 0, 0),
6575 XVECEXP (vals, 0, 1))));
6579 /* We are about to set the vector elements one by one. Zero out the
6580 full register first in order to help the data flow framework to
6581 detect it as full VR set. */
6582 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6584 /* Unfortunately the vec_init expander is not allowed to fail. So
6585 we have to implement the fallback ourselves. */
6586 for (i = 0; i < n_elts; i++)
6588 rtx elem = XVECEXP (vals, 0, i);
6589 if (!general_operand (elem, GET_MODE (elem)))
6590 elem = force_reg (inner_mode, elem);
6592 emit_insn (gen_rtx_SET (target,
6593 gen_rtx_UNSPEC (mode,
6595 GEN_INT (i), target),
6600 /* Structure to hold the initial parameters for a compare_and_swap operation
6601 in HImode and QImode. */
6603 struct alignment_context
6605 rtx memsi; /* SI aligned memory location. */
6606 rtx shift; /* Bit offset with regard to lsb. */
6607 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6608 rtx modemaski; /* ~modemask */
6609 bool aligned; /* True if memory is aligned, false else. */
6612 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6613 structure AC for transparent simplifying, if the memory alignment is known
6614 to be at least 32bit. MEM is the memory location for the actual operation
6615 and MODE its mode. */
6618 init_alignment_context (struct alignment_context *ac, rtx mem,
6621 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6622 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6625 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6628 /* Alignment is unknown. */
6629 rtx byteoffset, addr, align;
6631 /* Force the address into a register. */
6632 addr = force_reg (Pmode, XEXP (mem, 0));
6634 /* Align it to SImode. */
6635 align = expand_simple_binop (Pmode, AND, addr,
6636 GEN_INT (-GET_MODE_SIZE (SImode)),
6637 NULL_RTX, 1, OPTAB_DIRECT);
6639 ac->memsi = gen_rtx_MEM (SImode, align);
6640 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6641 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6642 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6644 /* Calculate shiftcount. */
6645 byteoffset = expand_simple_binop (Pmode, AND, addr,
6646 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6647 NULL_RTX, 1, OPTAB_DIRECT);
6648 /* As we already have some offset, evaluate the remaining distance. */
6649 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6650 NULL_RTX, 1, OPTAB_DIRECT);
6653 /* Shift is the byte count, but we need the bitcount. */
6654 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6655 NULL_RTX, 1, OPTAB_DIRECT);
6657 /* Calculate masks. */
6658 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6659 GEN_INT (GET_MODE_MASK (mode)),
6660 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6661 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6665 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6666 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6667 perform the merge in SEQ2. */
6670 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6671 machine_mode mode, rtx val, rtx ins)
6678 tmp = copy_to_mode_reg (SImode, val);
6679 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6683 *seq2 = get_insns ();
6690 /* Failed to use insv. Generate a two part shift and mask. */
6692 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6693 *seq1 = get_insns ();
6697 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6698 *seq2 = get_insns ();
6704 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6705 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6706 value to set if CMP == MEM. */
6709 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6710 rtx cmp, rtx new_rtx, bool is_weak)
6712 struct alignment_context ac;
6713 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6714 rtx res = gen_reg_rtx (SImode);
6715 rtx_code_label *csloop = NULL, *csend = NULL;
6717 gcc_assert (MEM_P (mem));
6719 init_alignment_context (&ac, mem, mode);
6721 /* Load full word. Subsequent loads are performed by CS. */
6722 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6723 NULL_RTX, 1, OPTAB_DIRECT);
6725 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6726 possible, we try to use insv to make this happen efficiently. If
6727 that fails we'll generate code both inside and outside the loop. */
6728 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6729 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6736 /* Start CS loop. */
6739 /* Begin assuming success. */
6740 emit_move_insn (btarget, const1_rtx);
6742 csloop = gen_label_rtx ();
6743 csend = gen_label_rtx ();
6744 emit_label (csloop);
6747 /* val = "<mem>00..0<mem>"
6748 * cmp = "00..0<cmp>00..0"
6749 * new = "00..0<new>00..0"
6755 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
6757 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6762 /* Jump to end if we're done (likely?). */
6763 s390_emit_jump (csend, cc);
6765 /* Check for changes outside mode, and loop internal if so.
6766 Arrange the moves so that the compare is adjacent to the
6767 branch so that we can generate CRJ. */
6768 tmp = copy_to_reg (val);
6769 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6771 cc = s390_emit_compare (NE, val, tmp);
6772 s390_emit_jump (csloop, cc);
6775 emit_move_insn (btarget, const0_rtx);
6779 /* Return the correct part of the bitfield. */
6780 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6781 NULL_RTX, 1, OPTAB_DIRECT), 1);
6784 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
6785 and VAL the value to play with. If AFTER is true then store the value
6786 MEM holds after the operation, if AFTER is false then store the value MEM
6787 holds before the operation. If TARGET is zero then discard that value, else
6788 store it to TARGET. */
6791 s390_expand_atomic (machine_mode mode, enum rtx_code code,
6792 rtx target, rtx mem, rtx val, bool after)
6794 struct alignment_context ac;
6796 rtx new_rtx = gen_reg_rtx (SImode);
6797 rtx orig = gen_reg_rtx (SImode);
6798 rtx_code_label *csloop = gen_label_rtx ();
6800 gcc_assert (!target || register_operand (target, VOIDmode));
6801 gcc_assert (MEM_P (mem));
6803 init_alignment_context (&ac, mem, mode);
6805 /* Shift val to the correct bit positions.
6806 Preserve "icm", but prevent "ex icm". */
6807 if (!(ac.aligned && code == SET && MEM_P (val)))
6808 val = s390_expand_mask_and_shift (val, mode, ac.shift);
6810 /* Further preparation insns. */
6811 if (code == PLUS || code == MINUS)
6812 emit_move_insn (orig, val);
6813 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
6814 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
6815 NULL_RTX, 1, OPTAB_DIRECT);
6817 /* Load full word. Subsequent loads are performed by CS. */
6818 cmp = force_reg (SImode, ac.memsi);
6820 /* Start CS loop. */
6821 emit_label (csloop);
6822 emit_move_insn (new_rtx, cmp);
6824 /* Patch new with val at correct position. */
6829 val = expand_simple_binop (SImode, code, new_rtx, orig,
6830 NULL_RTX, 1, OPTAB_DIRECT);
6831 val = expand_simple_binop (SImode, AND, val, ac.modemask,
6832 NULL_RTX, 1, OPTAB_DIRECT);
6835 if (ac.aligned && MEM_P (val))
6836 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
6837 0, 0, SImode, val, false);
6840 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
6841 NULL_RTX, 1, OPTAB_DIRECT);
6842 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
6843 NULL_RTX, 1, OPTAB_DIRECT);
6849 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
6850 NULL_RTX, 1, OPTAB_DIRECT);
6852 case MULT: /* NAND */
6853 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
6854 NULL_RTX, 1, OPTAB_DIRECT);
6855 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
6856 NULL_RTX, 1, OPTAB_DIRECT);
6862 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
6863 ac.memsi, cmp, new_rtx));
6865 /* Return the correct part of the bitfield. */
6867 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
6868 after ? new_rtx : cmp, ac.shift,
6869 NULL_RTX, 1, OPTAB_DIRECT), 1);
6872 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6873 We need to emit DTP-relative relocations. */
6875 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6878 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
6883 fputs ("\t.long\t", file);
6886 fputs ("\t.quad\t", file);
6891 output_addr_const (file, x);
6892 fputs ("@DTPOFF", file);
6895 /* Return the proper mode for REGNO being represented in the dwarf
6898 s390_dwarf_frame_reg_mode (int regno)
6900 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
6902 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
6903 if (GENERAL_REGNO_P (regno))
6906 /* The rightmost 64 bits of vector registers are call-clobbered. */
6907 if (GET_MODE_SIZE (save_mode) > 8)
6913 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
6914 /* Implement TARGET_MANGLE_TYPE. */
6917 s390_mangle_type (const_tree type)
6919 type = TYPE_MAIN_VARIANT (type);
6921 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
6922 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
6925 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
6926 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
6927 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
6928 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
6930 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
6931 && TARGET_LONG_DOUBLE_128)
6934 /* For all other types, use normal C++ mangling. */
6939 /* In the name of slightly smaller debug output, and to cater to
6940 general assembler lossage, recognize various UNSPEC sequences
6941 and turn them back into a direct symbol reference. */
6944 s390_delegitimize_address (rtx orig_x)
6948 orig_x = delegitimize_mem_from_attrs (orig_x);
6951 /* Extract the symbol ref from:
6952 (plus:SI (reg:SI 12 %r12)
6953 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
6954 UNSPEC_GOTOFF/PLTOFF)))
6956 (plus:SI (reg:SI 12 %r12)
6957 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
6958 UNSPEC_GOTOFF/PLTOFF)
6959 (const_int 4 [0x4])))) */
6960 if (GET_CODE (x) == PLUS
6961 && REG_P (XEXP (x, 0))
6962 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
6963 && GET_CODE (XEXP (x, 1)) == CONST)
6965 HOST_WIDE_INT offset = 0;
6967 /* The const operand. */
6968 y = XEXP (XEXP (x, 1), 0);
6970 if (GET_CODE (y) == PLUS
6971 && GET_CODE (XEXP (y, 1)) == CONST_INT)
6973 offset = INTVAL (XEXP (y, 1));
6977 if (GET_CODE (y) == UNSPEC
6978 && (XINT (y, 1) == UNSPEC_GOTOFF
6979 || XINT (y, 1) == UNSPEC_PLTOFF))
6980 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
6983 if (GET_CODE (x) != MEM)
6987 if (GET_CODE (x) == PLUS
6988 && GET_CODE (XEXP (x, 1)) == CONST
6989 && GET_CODE (XEXP (x, 0)) == REG
6990 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6992 y = XEXP (XEXP (x, 1), 0);
6993 if (GET_CODE (y) == UNSPEC
6994 && XINT (y, 1) == UNSPEC_GOT)
6995 y = XVECEXP (y, 0, 0);
6999 else if (GET_CODE (x) == CONST)
7001 /* Extract the symbol ref from:
7002 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7003 UNSPEC_PLT/GOTENT))) */
7006 if (GET_CODE (y) == UNSPEC
7007 && (XINT (y, 1) == UNSPEC_GOTENT
7008 || XINT (y, 1) == UNSPEC_PLT))
7009 y = XVECEXP (y, 0, 0);
7016 if (GET_MODE (orig_x) != Pmode)
7018 if (GET_MODE (orig_x) == BLKmode)
7020 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7027 /* Output operand OP to stdio stream FILE.
7028 OP is an address (register + offset) which is not used to address data;
7029 instead the rightmost bits are interpreted as the value. */
7032 print_addrstyle_operand (FILE *file, rtx op)
7034 HOST_WIDE_INT offset;
7037 /* Extract base register and offset. */
7038 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7044 gcc_assert (GET_CODE (base) == REG);
7045 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7046 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7049 /* Offsets are constricted to twelve bits. */
7050 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7052 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7055 /* Assigns the number of NOP halfwords to be emitted before and after the
7056 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7057 If hotpatching is disabled for the function, the values are set to zero.
7061 s390_function_num_hotpatch_hw (tree decl,
7067 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7069 /* Handle the arguments of the hotpatch attribute. The values
7070 specified via attribute might override the cmdline argument
7074 tree args = TREE_VALUE (attr);
7076 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7077 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7081 /* Use the values specified by the cmdline arguments. */
7082 *hw_before = s390_hotpatch_hw_before_label;
7083 *hw_after = s390_hotpatch_hw_after_label;
7087 /* Write the current .machine and .machinemode specification to the assembler
7090 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7092 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7094 fprintf (asm_out_file, "\t.machinemode %s\n",
7095 (TARGET_ZARCH) ? "zarch" : "esa");
7096 fprintf (asm_out_file, "\t.machine \"%s", processor_table[s390_arch].name);
7097 if (S390_USE_ARCHITECTURE_MODIFIERS)
7101 cpu_flags = processor_flags_table[(int) s390_arch];
7102 if (TARGET_HTM && !(cpu_flags & PF_TX))
7103 fprintf (asm_out_file, "+htm");
7104 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7105 fprintf (asm_out_file, "+nohtm");
7106 if (TARGET_VX && !(cpu_flags & PF_VX))
7107 fprintf (asm_out_file, "+vx");
7108 else if (!TARGET_VX && (cpu_flags & PF_VX))
7109 fprintf (asm_out_file, "+novx");
7111 fprintf (asm_out_file, "\"\n");
7114 /* Write an extra function header before the very start of the function. */
7117 s390_asm_output_function_prefix (FILE *asm_out_file,
7118 const char *fnname ATTRIBUTE_UNUSED)
7120 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7122 /* Since only the function specific options are saved but not the indications
7123 which options are set, it's too much work here to figure out which options
7124 have actually changed. Thus, generate .machine and .machinemode whenever a
7125 function has the target attribute or pragma. */
7126 fprintf (asm_out_file, "\t.machinemode push\n");
7127 fprintf (asm_out_file, "\t.machine push\n");
7128 s390_asm_output_machine_for_arch (asm_out_file);
7131 /* Write an extra function footer after the very end of the function. */
7134 s390_asm_declare_function_size (FILE *asm_out_file,
7135 const char *fnname, tree decl)
7137 if (!flag_inhibit_size_directive)
7138 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7139 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7141 fprintf (asm_out_file, "\t.machine pop\n");
7142 fprintf (asm_out_file, "\t.machinemode pop\n");
7146 /* Write the extra assembler code needed to declare a function properly. */
7149 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7152 int hw_before, hw_after;
7154 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7157 unsigned int function_alignment;
7160 /* Add a trampoline code area before the function label and initialize it
7161 with two-byte nop instructions. This area can be overwritten with code
7162 that jumps to a patched version of the function. */
7163 asm_fprintf (asm_out_file, "\tnopr\t%%r7"
7164 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7166 for (i = 1; i < hw_before; i++)
7167 fputs ("\tnopr\t%r7\n", asm_out_file);
7169 /* Note: The function label must be aligned so that (a) the bytes of the
7170 following nop do not cross a cacheline boundary, and (b) a jump address
7171 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7172 stored directly before the label without crossing a cacheline
7173 boundary. All this is necessary to make sure the trampoline code can
7174 be changed atomically.
7175 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7176 if there are NOPs before the function label, the alignment is placed
7177 before them. So it is necessary to duplicate the alignment after the
7179 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7180 if (! DECL_USER_ALIGN (decl))
7181 function_alignment = MAX (function_alignment,
7182 (unsigned int) align_functions);
7183 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7184 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
7187 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7189 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7190 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7191 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7192 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7193 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7194 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7195 s390_warn_framesize);
7196 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7197 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7198 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7199 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7200 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7201 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7202 TARGET_PACKED_STACK);
7203 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7204 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7205 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7206 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7207 s390_warn_dynamicstack_p);
7209 ASM_OUTPUT_LABEL (asm_out_file, fname);
7211 asm_fprintf (asm_out_file,
7212 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7216 /* Output machine-dependent UNSPECs occurring in address constant X
7217 in assembler syntax to stdio stream FILE. Returns true if the
7218 constant X could be recognized, false otherwise. */
7221 s390_output_addr_const_extra (FILE *file, rtx x)
7223 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7224 switch (XINT (x, 1))
7227 output_addr_const (file, XVECEXP (x, 0, 0));
7228 fprintf (file, "@GOTENT");
7231 output_addr_const (file, XVECEXP (x, 0, 0));
7232 fprintf (file, "@GOT");
7235 output_addr_const (file, XVECEXP (x, 0, 0));
7236 fprintf (file, "@GOTOFF");
7239 output_addr_const (file, XVECEXP (x, 0, 0));
7240 fprintf (file, "@PLT");
7243 output_addr_const (file, XVECEXP (x, 0, 0));
7244 fprintf (file, "@PLTOFF");
7247 output_addr_const (file, XVECEXP (x, 0, 0));
7248 fprintf (file, "@TLSGD");
7251 assemble_name (file, get_some_local_dynamic_name ());
7252 fprintf (file, "@TLSLDM");
7255 output_addr_const (file, XVECEXP (x, 0, 0));
7256 fprintf (file, "@DTPOFF");
7259 output_addr_const (file, XVECEXP (x, 0, 0));
7260 fprintf (file, "@NTPOFF");
7262 case UNSPEC_GOTNTPOFF:
7263 output_addr_const (file, XVECEXP (x, 0, 0));
7264 fprintf (file, "@GOTNTPOFF");
7266 case UNSPEC_INDNTPOFF:
7267 output_addr_const (file, XVECEXP (x, 0, 0));
7268 fprintf (file, "@INDNTPOFF");
7272 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7273 switch (XINT (x, 1))
7275 case UNSPEC_POOL_OFFSET:
7276 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7277 output_addr_const (file, x);
7283 /* Output address operand ADDR in assembler syntax to
7284 stdio stream FILE. */
7287 print_operand_address (FILE *file, rtx addr)
7289 struct s390_address ad;
7291 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7295 output_operand_lossage ("symbolic memory references are "
7296 "only supported on z10 or later");
7299 output_addr_const (file, addr);
7303 if (!s390_decompose_address (addr, &ad)
7304 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7305 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7306 output_operand_lossage ("cannot decompose address");
7309 output_addr_const (file, ad.disp);
7311 fprintf (file, "0");
7313 if (ad.base && ad.indx)
7314 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7315 reg_names[REGNO (ad.base)]);
7317 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7320 /* Output operand X in assembler syntax to stdio stream FILE.
7321 CODE specified the format flag. The following format flags
7324 'C': print opcode suffix for branch condition.
7325 'D': print opcode suffix for inverse branch condition.
7326 'E': print opcode suffix for branch on index instruction.
7327 'G': print the size of the operand in bytes.
7328 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7329 'M': print the second word of a TImode operand.
7330 'N': print the second word of a DImode operand.
7331 'O': print only the displacement of a memory reference or address.
7332 'R': print only the base register of a memory reference or address.
7333 'S': print S-type memory reference (base+displacement).
7334 'Y': print address style operand without index (e.g. shift count or setmem
7337 'b': print integer X as if it's an unsigned byte.
7338 'c': print integer X as if it's an signed byte.
7339 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7340 'f': "end" contiguous bitmask X in SImode.
7341 'h': print integer X as if it's a signed halfword.
7342 'i': print the first nonzero HImode part of X.
7343 'j': print the first HImode part unequal to -1 of X.
7344 'k': print the first nonzero SImode part of X.
7345 'm': print the first SImode part unequal to -1 of X.
7346 'o': print integer X as if it's an unsigned 32bit word.
7347 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7348 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7349 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7350 'x': print integer X as if it's an unsigned halfword.
7351 'v': print register number as vector register (v1 instead of f1).
7355 print_operand (FILE *file, rtx x, int code)
7362 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7366 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7370 if (GET_CODE (x) == LE)
7371 fprintf (file, "l");
7372 else if (GET_CODE (x) == GT)
7373 fprintf (file, "h");
7375 output_operand_lossage ("invalid comparison operator "
7376 "for 'E' output modifier");
7380 if (GET_CODE (x) == SYMBOL_REF)
7382 fprintf (file, "%s", ":tls_load:");
7383 output_addr_const (file, x);
7385 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7387 fprintf (file, "%s", ":tls_gdcall:");
7388 output_addr_const (file, XVECEXP (x, 0, 0));
7390 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7392 fprintf (file, "%s", ":tls_ldcall:");
7393 const char *name = get_some_local_dynamic_name ();
7395 assemble_name (file, name);
7398 output_operand_lossage ("invalid reference for 'J' output modifier");
7402 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7407 struct s390_address ad;
7410 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7413 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7416 output_operand_lossage ("invalid address for 'O' output modifier");
7421 output_addr_const (file, ad.disp);
7423 fprintf (file, "0");
7429 struct s390_address ad;
7432 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7435 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7438 output_operand_lossage ("invalid address for 'R' output modifier");
7443 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7445 fprintf (file, "0");
7451 struct s390_address ad;
7456 output_operand_lossage ("memory reference expected for "
7457 "'S' output modifier");
7460 ret = s390_decompose_address (XEXP (x, 0), &ad);
7463 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7466 output_operand_lossage ("invalid address for 'S' output modifier");
7471 output_addr_const (file, ad.disp);
7473 fprintf (file, "0");
7476 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7481 if (GET_CODE (x) == REG)
7482 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7483 else if (GET_CODE (x) == MEM)
7484 x = change_address (x, VOIDmode,
7485 plus_constant (Pmode, XEXP (x, 0), 4));
7487 output_operand_lossage ("register or memory expression expected "
7488 "for 'N' output modifier");
7492 if (GET_CODE (x) == REG)
7493 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7494 else if (GET_CODE (x) == MEM)
7495 x = change_address (x, VOIDmode,
7496 plus_constant (Pmode, XEXP (x, 0), 8));
7498 output_operand_lossage ("register or memory expression expected "
7499 "for 'M' output modifier");
7503 print_addrstyle_operand (file, x);
7507 switch (GET_CODE (x))
7510 /* Print FP regs as fx instead of vx when they are accessed
7511 through non-vector mode. */
7513 || VECTOR_NOFP_REG_P (x)
7514 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7515 || (VECTOR_REG_P (x)
7516 && (GET_MODE_SIZE (GET_MODE (x)) /
7517 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7518 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7520 fprintf (file, "%s", reg_names[REGNO (x)]);
7524 output_address (GET_MODE (x), XEXP (x, 0));
7531 output_addr_const (file, x);
7544 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7550 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7553 ival = s390_extract_part (x, HImode, 0);
7556 ival = s390_extract_part (x, HImode, -1);
7559 ival = s390_extract_part (x, SImode, 0);
7562 ival = s390_extract_part (x, SImode, -1);
7574 len = (code == 's' || code == 'e' ? 64 : 32);
7575 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7577 if (code == 's' || code == 't')
7584 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7586 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7589 case CONST_WIDE_INT:
7591 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7592 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7593 else if (code == 'x')
7594 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7595 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7596 else if (code == 'h')
7597 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7598 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7602 output_operand_lossage ("invalid constant - try using "
7603 "an output modifier");
7605 output_operand_lossage ("invalid constant for output modifier '%c'",
7613 gcc_assert (const_vec_duplicate_p (x));
7614 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7615 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7623 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
7625 ival = (code == 's') ? start : end;
7626 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7632 bool ok = s390_bytemask_vector_p (x, &mask);
7634 fprintf (file, "%u", mask);
7639 output_operand_lossage ("invalid constant vector for output "
7640 "modifier '%c'", code);
7646 output_operand_lossage ("invalid expression - try using "
7647 "an output modifier");
7649 output_operand_lossage ("invalid expression for output "
7650 "modifier '%c'", code);
7655 /* Target hook for assembling integer objects. We need to define it
7656 here to work a round a bug in some versions of GAS, which couldn't
7657 handle values smaller than INT_MIN when printed in decimal. */
7660 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7662 if (size == 8 && aligned_p
7663 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7665 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7669 return default_assemble_integer (x, size, aligned_p);
7672 /* Returns true if register REGNO is used for forming
7673 a memory address in expression X. */
7676 reg_used_in_mem_p (int regno, rtx x)
7678 enum rtx_code code = GET_CODE (x);
7684 if (refers_to_regno_p (regno, XEXP (x, 0)))
7687 else if (code == SET
7688 && GET_CODE (SET_DEST (x)) == PC)
7690 if (refers_to_regno_p (regno, SET_SRC (x)))
7694 fmt = GET_RTX_FORMAT (code);
7695 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7698 && reg_used_in_mem_p (regno, XEXP (x, i)))
7701 else if (fmt[i] == 'E')
7702 for (j = 0; j < XVECLEN (x, i); j++)
7703 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
7709 /* Returns true if expression DEP_RTX sets an address register
7710 used by instruction INSN to address memory. */
7713 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
7717 if (NONJUMP_INSN_P (dep_rtx))
7718 dep_rtx = PATTERN (dep_rtx);
7720 if (GET_CODE (dep_rtx) == SET)
7722 target = SET_DEST (dep_rtx);
7723 if (GET_CODE (target) == STRICT_LOW_PART)
7724 target = XEXP (target, 0);
7725 while (GET_CODE (target) == SUBREG)
7726 target = SUBREG_REG (target);
7728 if (GET_CODE (target) == REG)
7730 int regno = REGNO (target);
7732 if (s390_safe_attr_type (insn) == TYPE_LA)
7734 pat = PATTERN (insn);
7735 if (GET_CODE (pat) == PARALLEL)
7737 gcc_assert (XVECLEN (pat, 0) == 2);
7738 pat = XVECEXP (pat, 0, 0);
7740 gcc_assert (GET_CODE (pat) == SET);
7741 return refers_to_regno_p (regno, SET_SRC (pat));
7743 else if (get_attr_atype (insn) == ATYPE_AGEN)
7744 return reg_used_in_mem_p (regno, PATTERN (insn));
7750 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
7753 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
7755 rtx dep_rtx = PATTERN (dep_insn);
7758 if (GET_CODE (dep_rtx) == SET
7759 && addr_generation_dependency_p (dep_rtx, insn))
7761 else if (GET_CODE (dep_rtx) == PARALLEL)
7763 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
7765 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
7773 /* A C statement (sans semicolon) to update the integer scheduling priority
7774 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
7775 reduce the priority to execute INSN later. Do not define this macro if
7776 you do not need to adjust the scheduling priorities of insns.
7778 A STD instruction should be scheduled earlier,
7779 in order to use the bypass. */
7781 s390_adjust_priority (rtx_insn *insn, int priority)
7783 if (! INSN_P (insn))
7786 if (s390_tune <= PROCESSOR_2064_Z900)
7789 switch (s390_safe_attr_type (insn))
7793 priority = priority << 3;
7797 priority = priority << 1;
7806 /* The number of instructions that can be issued per cycle. */
7809 s390_issue_rate (void)
7813 case PROCESSOR_2084_Z990:
7814 case PROCESSOR_2094_Z9_109:
7815 case PROCESSOR_2094_Z9_EC:
7816 case PROCESSOR_2817_Z196:
7818 case PROCESSOR_2097_Z10:
7820 case PROCESSOR_9672_G5:
7821 case PROCESSOR_9672_G6:
7822 case PROCESSOR_2064_Z900:
7823 /* Starting with EC12 we use the sched_reorder hook to take care
7824 of instruction dispatch constraints. The algorithm only
7825 picks the best instruction and assumes only a single
7826 instruction gets issued per cycle. */
7827 case PROCESSOR_2827_ZEC12:
7828 case PROCESSOR_2964_Z13:
7835 s390_first_cycle_multipass_dfa_lookahead (void)
7840 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
7841 Fix up MEMs as required. */
7844 annotate_constant_pool_refs (rtx *x)
7849 gcc_assert (GET_CODE (*x) != SYMBOL_REF
7850 || !CONSTANT_POOL_ADDRESS_P (*x));
7852 /* Literal pool references can only occur inside a MEM ... */
7853 if (GET_CODE (*x) == MEM)
7855 rtx memref = XEXP (*x, 0);
7857 if (GET_CODE (memref) == SYMBOL_REF
7858 && CONSTANT_POOL_ADDRESS_P (memref))
7860 rtx base = cfun->machine->base_reg;
7861 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
7864 *x = replace_equiv_address (*x, addr);
7868 if (GET_CODE (memref) == CONST
7869 && GET_CODE (XEXP (memref, 0)) == PLUS
7870 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
7871 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
7872 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
7874 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
7875 rtx sym = XEXP (XEXP (memref, 0), 0);
7876 rtx base = cfun->machine->base_reg;
7877 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7880 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
7885 /* ... or a load-address type pattern. */
7886 if (GET_CODE (*x) == SET)
7888 rtx addrref = SET_SRC (*x);
7890 if (GET_CODE (addrref) == SYMBOL_REF
7891 && CONSTANT_POOL_ADDRESS_P (addrref))
7893 rtx base = cfun->machine->base_reg;
7894 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
7897 SET_SRC (*x) = addr;
7901 if (GET_CODE (addrref) == CONST
7902 && GET_CODE (XEXP (addrref, 0)) == PLUS
7903 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
7904 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
7905 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
7907 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
7908 rtx sym = XEXP (XEXP (addrref, 0), 0);
7909 rtx base = cfun->machine->base_reg;
7910 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7913 SET_SRC (*x) = plus_constant (Pmode, addr, off);
7918 /* Annotate LTREL_BASE as well. */
7919 if (GET_CODE (*x) == UNSPEC
7920 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7922 rtx base = cfun->machine->base_reg;
7923 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
7928 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7929 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7933 annotate_constant_pool_refs (&XEXP (*x, i));
7935 else if (fmt[i] == 'E')
7937 for (j = 0; j < XVECLEN (*x, i); j++)
7938 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
7943 /* Split all branches that exceed the maximum distance.
7944 Returns true if this created a new literal pool entry. */
7947 s390_split_branches (void)
7949 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
7950 int new_literal = 0, ret;
7955 /* We need correct insn addresses. */
7957 shorten_branches (get_insns ());
7959 /* Find all branches that exceed 64KB, and split them. */
7961 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7963 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
7966 pat = PATTERN (insn);
7967 if (GET_CODE (pat) == PARALLEL)
7968 pat = XVECEXP (pat, 0, 0);
7969 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
7972 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
7974 label = &SET_SRC (pat);
7976 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
7978 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
7979 label = &XEXP (SET_SRC (pat), 1);
7980 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
7981 label = &XEXP (SET_SRC (pat), 2);
7988 if (get_attr_length (insn) <= 4)
7991 /* We are going to use the return register as scratch register,
7992 make sure it will be saved/restored by the prologue/epilogue. */
7993 cfun_frame_layout.save_return_addr_p = 1;
7998 rtx mem = force_const_mem (Pmode, *label);
7999 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
8001 INSN_ADDRESSES_NEW (set_insn, -1);
8002 annotate_constant_pool_refs (&PATTERN (set_insn));
8009 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
8010 UNSPEC_LTREL_OFFSET);
8011 target = gen_rtx_CONST (Pmode, target);
8012 target = force_const_mem (Pmode, target);
8013 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
8015 INSN_ADDRESSES_NEW (set_insn, -1);
8016 annotate_constant_pool_refs (&PATTERN (set_insn));
8018 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
8019 cfun->machine->base_reg),
8021 target = gen_rtx_PLUS (Pmode, temp_reg, target);
8024 ret = validate_change (insn, label, target, 0);
8032 /* Find an annotated literal pool symbol referenced in RTX X,
8033 and store it at REF. Will abort if X contains references to
8034 more than one such pool symbol; multiple references to the same
8035 symbol are allowed, however.
8037 The rtx pointed to by REF must be initialized to NULL_RTX
8038 by the caller before calling this routine. */
8041 find_constant_pool_ref (rtx x, rtx *ref)
8046 /* Ignore LTREL_BASE references. */
8047 if (GET_CODE (x) == UNSPEC
8048 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8050 /* Likewise POOL_ENTRY insns. */
8051 if (GET_CODE (x) == UNSPEC_VOLATILE
8052 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8055 gcc_assert (GET_CODE (x) != SYMBOL_REF
8056 || !CONSTANT_POOL_ADDRESS_P (x));
8058 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8060 rtx sym = XVECEXP (x, 0, 0);
8061 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8062 && CONSTANT_POOL_ADDRESS_P (sym));
8064 if (*ref == NULL_RTX)
8067 gcc_assert (*ref == sym);
8072 fmt = GET_RTX_FORMAT (GET_CODE (x));
8073 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8077 find_constant_pool_ref (XEXP (x, i), ref);
8079 else if (fmt[i] == 'E')
8081 for (j = 0; j < XVECLEN (x, i); j++)
8082 find_constant_pool_ref (XVECEXP (x, i, j), ref);
8087 /* Replace every reference to the annotated literal pool
8088 symbol REF in X by its base plus OFFSET. */
8091 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
8096 gcc_assert (*x != ref);
8098 if (GET_CODE (*x) == UNSPEC
8099 && XINT (*x, 1) == UNSPEC_LTREF
8100 && XVECEXP (*x, 0, 0) == ref)
8102 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8106 if (GET_CODE (*x) == PLUS
8107 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8108 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8109 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8110 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8112 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8113 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8117 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8118 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8122 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
8124 else if (fmt[i] == 'E')
8126 for (j = 0; j < XVECLEN (*x, i); j++)
8127 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
8132 /* Check whether X contains an UNSPEC_LTREL_BASE.
8133 Return its constant pool symbol if found, NULL_RTX otherwise. */
8136 find_ltrel_base (rtx x)
8141 if (GET_CODE (x) == UNSPEC
8142 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8143 return XVECEXP (x, 0, 0);
8145 fmt = GET_RTX_FORMAT (GET_CODE (x));
8146 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8150 rtx fnd = find_ltrel_base (XEXP (x, i));
8154 else if (fmt[i] == 'E')
8156 for (j = 0; j < XVECLEN (x, i); j++)
8158 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
8168 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
8171 replace_ltrel_base (rtx *x)
8176 if (GET_CODE (*x) == UNSPEC
8177 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8179 *x = XVECEXP (*x, 0, 1);
8183 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8184 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8188 replace_ltrel_base (&XEXP (*x, i));
8190 else if (fmt[i] == 'E')
8192 for (j = 0; j < XVECLEN (*x, i); j++)
8193 replace_ltrel_base (&XVECEXP (*x, i, j));
8199 /* We keep a list of constants which we have to add to internal
8200 constant tables in the middle of large functions. */
8202 #define NR_C_MODES 32
8203 machine_mode constant_modes[NR_C_MODES] =
8205 TFmode, TImode, TDmode,
8206 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8207 V4SFmode, V2DFmode, V1TFmode,
8208 DFmode, DImode, DDmode,
8209 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8210 SFmode, SImode, SDmode,
8211 V4QImode, V2HImode, V1SImode, V1SFmode,
8220 struct constant *next;
8222 rtx_code_label *label;
8225 struct constant_pool
8227 struct constant_pool *next;
8228 rtx_insn *first_insn;
8229 rtx_insn *pool_insn;
8231 rtx_insn *emit_pool_after;
8233 struct constant *constants[NR_C_MODES];
8234 struct constant *execute;
8235 rtx_code_label *label;
8239 /* Allocate new constant_pool structure. */
8241 static struct constant_pool *
8242 s390_alloc_pool (void)
8244 struct constant_pool *pool;
8247 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8249 for (i = 0; i < NR_C_MODES; i++)
8250 pool->constants[i] = NULL;
8252 pool->execute = NULL;
8253 pool->label = gen_label_rtx ();
8254 pool->first_insn = NULL;
8255 pool->pool_insn = NULL;
8256 pool->insns = BITMAP_ALLOC (NULL);
8258 pool->emit_pool_after = NULL;
8263 /* Create new constant pool covering instructions starting at INSN
8264 and chain it to the end of POOL_LIST. */
8266 static struct constant_pool *
8267 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8269 struct constant_pool *pool, **prev;
8271 pool = s390_alloc_pool ();
8272 pool->first_insn = insn;
8274 for (prev = pool_list; *prev; prev = &(*prev)->next)
8281 /* End range of instructions covered by POOL at INSN and emit
8282 placeholder insn representing the pool. */
8285 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8287 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8290 insn = get_last_insn ();
8292 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8293 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8296 /* Add INSN to the list of insns covered by POOL. */
8299 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8301 bitmap_set_bit (pool->insns, INSN_UID (insn));
8304 /* Return pool out of POOL_LIST that covers INSN. */
8306 static struct constant_pool *
8307 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8309 struct constant_pool *pool;
8311 for (pool = pool_list; pool; pool = pool->next)
8312 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8318 /* Add constant VAL of mode MODE to the constant pool POOL. */
8321 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8326 for (i = 0; i < NR_C_MODES; i++)
8327 if (constant_modes[i] == mode)
8329 gcc_assert (i != NR_C_MODES);
8331 for (c = pool->constants[i]; c != NULL; c = c->next)
8332 if (rtx_equal_p (val, c->value))
8337 c = (struct constant *) xmalloc (sizeof *c);
8339 c->label = gen_label_rtx ();
8340 c->next = pool->constants[i];
8341 pool->constants[i] = c;
8342 pool->size += GET_MODE_SIZE (mode);
8346 /* Return an rtx that represents the offset of X from the start of
8350 s390_pool_offset (struct constant_pool *pool, rtx x)
8354 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8355 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8356 UNSPEC_POOL_OFFSET);
8357 return gen_rtx_CONST (GET_MODE (x), x);
8360 /* Find constant VAL of mode MODE in the constant pool POOL.
8361 Return an RTX describing the distance from the start of
8362 the pool to the location of the new constant. */
8365 s390_find_constant (struct constant_pool *pool, rtx val,
8371 for (i = 0; i < NR_C_MODES; i++)
8372 if (constant_modes[i] == mode)
8374 gcc_assert (i != NR_C_MODES);
8376 for (c = pool->constants[i]; c != NULL; c = c->next)
8377 if (rtx_equal_p (val, c->value))
8382 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8385 /* Check whether INSN is an execute. Return the label_ref to its
8386 execute target template if so, NULL_RTX otherwise. */
8389 s390_execute_label (rtx insn)
8391 if (NONJUMP_INSN_P (insn)
8392 && GET_CODE (PATTERN (insn)) == PARALLEL
8393 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8394 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8395 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8400 /* Add execute target for INSN to the constant pool POOL. */
8403 s390_add_execute (struct constant_pool *pool, rtx insn)
8407 for (c = pool->execute; c != NULL; c = c->next)
8408 if (INSN_UID (insn) == INSN_UID (c->value))
8413 c = (struct constant *) xmalloc (sizeof *c);
8415 c->label = gen_label_rtx ();
8416 c->next = pool->execute;
8422 /* Find execute target for INSN in the constant pool POOL.
8423 Return an RTX describing the distance from the start of
8424 the pool to the location of the execute target. */
8427 s390_find_execute (struct constant_pool *pool, rtx insn)
8431 for (c = pool->execute; c != NULL; c = c->next)
8432 if (INSN_UID (insn) == INSN_UID (c->value))
8437 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8440 /* For an execute INSN, extract the execute target template. */
8443 s390_execute_target (rtx insn)
8445 rtx pattern = PATTERN (insn);
8446 gcc_assert (s390_execute_label (insn));
8448 if (XVECLEN (pattern, 0) == 2)
8450 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8454 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8457 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8458 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8460 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8466 /* Indicate that INSN cannot be duplicated. This is the case for
8467 execute insns that carry a unique label. */
8470 s390_cannot_copy_insn_p (rtx_insn *insn)
8472 rtx label = s390_execute_label (insn);
8473 return label && label != const0_rtx;
8476 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8477 do not emit the pool base label. */
8480 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8483 rtx_insn *insn = pool->pool_insn;
8486 /* Switch to rodata section. */
8487 if (TARGET_CPU_ZARCH)
8489 insn = emit_insn_after (gen_pool_section_start (), insn);
8490 INSN_ADDRESSES_NEW (insn, -1);
8493 /* Ensure minimum pool alignment. */
8494 if (TARGET_CPU_ZARCH)
8495 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8497 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8498 INSN_ADDRESSES_NEW (insn, -1);
8500 /* Emit pool base label. */
8503 insn = emit_label_after (pool->label, insn);
8504 INSN_ADDRESSES_NEW (insn, -1);
8507 /* Dump constants in descending alignment requirement order,
8508 ensuring proper alignment for every constant. */
8509 for (i = 0; i < NR_C_MODES; i++)
8510 for (c = pool->constants[i]; c; c = c->next)
8512 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8513 rtx value = copy_rtx (c->value);
8514 if (GET_CODE (value) == CONST
8515 && GET_CODE (XEXP (value, 0)) == UNSPEC
8516 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8517 && XVECLEN (XEXP (value, 0), 0) == 1)
8518 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8520 insn = emit_label_after (c->label, insn);
8521 INSN_ADDRESSES_NEW (insn, -1);
8523 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8524 gen_rtvec (1, value),
8525 UNSPECV_POOL_ENTRY);
8526 insn = emit_insn_after (value, insn);
8527 INSN_ADDRESSES_NEW (insn, -1);
8530 /* Ensure minimum alignment for instructions. */
8531 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8532 INSN_ADDRESSES_NEW (insn, -1);
8534 /* Output in-pool execute template insns. */
8535 for (c = pool->execute; c; c = c->next)
8537 insn = emit_label_after (c->label, insn);
8538 INSN_ADDRESSES_NEW (insn, -1);
8540 insn = emit_insn_after (s390_execute_target (c->value), insn);
8541 INSN_ADDRESSES_NEW (insn, -1);
8544 /* Switch back to previous section. */
8545 if (TARGET_CPU_ZARCH)
8547 insn = emit_insn_after (gen_pool_section_end (), insn);
8548 INSN_ADDRESSES_NEW (insn, -1);
8551 insn = emit_barrier_after (insn);
8552 INSN_ADDRESSES_NEW (insn, -1);
8554 /* Remove placeholder insn. */
8555 remove_insn (pool->pool_insn);
8558 /* Free all memory used by POOL. */
8561 s390_free_pool (struct constant_pool *pool)
8563 struct constant *c, *next;
8566 for (i = 0; i < NR_C_MODES; i++)
8567 for (c = pool->constants[i]; c; c = next)
8573 for (c = pool->execute; c; c = next)
8579 BITMAP_FREE (pool->insns);
8584 /* Collect main literal pool. Return NULL on overflow. */
8586 static struct constant_pool *
8587 s390_mainpool_start (void)
8589 struct constant_pool *pool;
8592 pool = s390_alloc_pool ();
8594 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8596 if (NONJUMP_INSN_P (insn)
8597 && GET_CODE (PATTERN (insn)) == SET
8598 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8599 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8601 /* There might be two main_pool instructions if base_reg
8602 is call-clobbered; one for shrink-wrapped code and one
8603 for the rest. We want to keep the first. */
8604 if (pool->pool_insn)
8606 insn = PREV_INSN (insn);
8607 delete_insn (NEXT_INSN (insn));
8610 pool->pool_insn = insn;
8613 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8615 s390_add_execute (pool, insn);
8617 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8619 rtx pool_ref = NULL_RTX;
8620 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8623 rtx constant = get_pool_constant (pool_ref);
8624 machine_mode mode = get_pool_mode (pool_ref);
8625 s390_add_constant (pool, constant, mode);
8629 /* If hot/cold partitioning is enabled we have to make sure that
8630 the literal pool is emitted in the same section where the
8631 initialization of the literal pool base pointer takes place.
8632 emit_pool_after is only used in the non-overflow case on non
8633 Z cpus where we can emit the literal pool at the end of the
8634 function body within the text section. */
8636 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8637 && !pool->emit_pool_after)
8638 pool->emit_pool_after = PREV_INSN (insn);
8641 gcc_assert (pool->pool_insn || pool->size == 0);
8643 if (pool->size >= 4096)
8645 /* We're going to chunkify the pool, so remove the main
8646 pool placeholder insn. */
8647 remove_insn (pool->pool_insn);
8649 s390_free_pool (pool);
8653 /* If the functions ends with the section where the literal pool
8654 should be emitted set the marker to its end. */
8655 if (pool && !pool->emit_pool_after)
8656 pool->emit_pool_after = get_last_insn ();
8661 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8662 Modify the current function to output the pool constants as well as
8663 the pool register setup instruction. */
8666 s390_mainpool_finish (struct constant_pool *pool)
8668 rtx base_reg = cfun->machine->base_reg;
8670 /* If the pool is empty, we're done. */
8671 if (pool->size == 0)
8673 /* We don't actually need a base register after all. */
8674 cfun->machine->base_reg = NULL_RTX;
8676 if (pool->pool_insn)
8677 remove_insn (pool->pool_insn);
8678 s390_free_pool (pool);
8682 /* We need correct insn addresses. */
8683 shorten_branches (get_insns ());
8685 /* On zSeries, we use a LARL to load the pool register. The pool is
8686 located in the .rodata section, so we emit it after the function. */
8687 if (TARGET_CPU_ZARCH)
8689 rtx set = gen_main_base_64 (base_reg, pool->label);
8690 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8691 INSN_ADDRESSES_NEW (insn, -1);
8692 remove_insn (pool->pool_insn);
8694 insn = get_last_insn ();
8695 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8696 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8698 s390_dump_pool (pool, 0);
8701 /* On S/390, if the total size of the function's code plus literal pool
8702 does not exceed 4096 bytes, we use BASR to set up a function base
8703 pointer, and emit the literal pool at the end of the function. */
8704 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
8705 + pool->size + 8 /* alignment slop */ < 4096)
8707 rtx set = gen_main_base_31_small (base_reg, pool->label);
8708 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8709 INSN_ADDRESSES_NEW (insn, -1);
8710 remove_insn (pool->pool_insn);
8712 insn = emit_label_after (pool->label, insn);
8713 INSN_ADDRESSES_NEW (insn, -1);
8715 /* emit_pool_after will be set by s390_mainpool_start to the
8716 last insn of the section where the literal pool should be
8718 insn = pool->emit_pool_after;
8720 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8721 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8723 s390_dump_pool (pool, 1);
8726 /* Otherwise, we emit an inline literal pool and use BASR to branch
8727 over it, setting up the pool register at the same time. */
8730 rtx_code_label *pool_end = gen_label_rtx ();
8732 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
8733 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
8734 JUMP_LABEL (insn) = pool_end;
8735 INSN_ADDRESSES_NEW (insn, -1);
8736 remove_insn (pool->pool_insn);
8738 insn = emit_label_after (pool->label, insn);
8739 INSN_ADDRESSES_NEW (insn, -1);
8741 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8742 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8744 insn = emit_label_after (pool_end, pool->pool_insn);
8745 INSN_ADDRESSES_NEW (insn, -1);
8747 s390_dump_pool (pool, 1);
8751 /* Replace all literal pool references. */
8753 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8756 replace_ltrel_base (&PATTERN (insn));
8758 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8760 rtx addr, pool_ref = NULL_RTX;
8761 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8764 if (s390_execute_label (insn))
8765 addr = s390_find_execute (pool, insn);
8767 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8768 get_pool_mode (pool_ref));
8770 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8771 INSN_CODE (insn) = -1;
8777 /* Free the pool. */
8778 s390_free_pool (pool);
8781 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8782 We have decided we cannot use this pool, so revert all changes
8783 to the current function that were done by s390_mainpool_start. */
8785 s390_mainpool_cancel (struct constant_pool *pool)
8787 /* We didn't actually change the instruction stream, so simply
8788 free the pool memory. */
8789 s390_free_pool (pool);
8793 /* Chunkify the literal pool. */
8795 #define S390_POOL_CHUNK_MIN 0xc00
8796 #define S390_POOL_CHUNK_MAX 0xe00
8798 static struct constant_pool *
8799 s390_chunkify_start (void)
8801 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8804 rtx pending_ltrel = NULL_RTX;
8807 rtx (*gen_reload_base) (rtx, rtx) =
8808 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
8811 /* We need correct insn addresses. */
8813 shorten_branches (get_insns ());
8815 /* Scan all insns and move literals to pool chunks. */
8817 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8819 bool section_switch_p = false;
8821 /* Check for pending LTREL_BASE. */
8824 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
8827 gcc_assert (ltrel_base == pending_ltrel);
8828 pending_ltrel = NULL_RTX;
8832 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8835 curr_pool = s390_start_pool (&pool_list, insn);
8837 s390_add_execute (curr_pool, insn);
8838 s390_add_pool_insn (curr_pool, insn);
8840 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8842 rtx pool_ref = NULL_RTX;
8843 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8846 rtx constant = get_pool_constant (pool_ref);
8847 machine_mode mode = get_pool_mode (pool_ref);
8850 curr_pool = s390_start_pool (&pool_list, insn);
8852 s390_add_constant (curr_pool, constant, mode);
8853 s390_add_pool_insn (curr_pool, insn);
8855 /* Don't split the pool chunk between a LTREL_OFFSET load
8856 and the corresponding LTREL_BASE. */
8857 if (GET_CODE (constant) == CONST
8858 && GET_CODE (XEXP (constant, 0)) == UNSPEC
8859 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
8861 gcc_assert (!pending_ltrel);
8862 pending_ltrel = pool_ref;
8867 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
8870 s390_add_pool_insn (curr_pool, insn);
8871 /* An LTREL_BASE must follow within the same basic block. */
8872 gcc_assert (!pending_ltrel);
8876 switch (NOTE_KIND (insn))
8878 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
8879 section_switch_p = true;
8881 case NOTE_INSN_VAR_LOCATION:
8882 case NOTE_INSN_CALL_ARG_LOCATION:
8889 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
8890 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
8893 if (TARGET_CPU_ZARCH)
8895 if (curr_pool->size < S390_POOL_CHUNK_MAX)
8898 s390_end_pool (curr_pool, NULL);
8903 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
8904 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
8907 /* We will later have to insert base register reload insns.
8908 Those will have an effect on code size, which we need to
8909 consider here. This calculation makes rather pessimistic
8910 worst-case assumptions. */
8914 if (chunk_size < S390_POOL_CHUNK_MIN
8915 && curr_pool->size < S390_POOL_CHUNK_MIN
8916 && !section_switch_p)
8919 /* Pool chunks can only be inserted after BARRIERs ... */
8920 if (BARRIER_P (insn))
8922 s390_end_pool (curr_pool, insn);
8927 /* ... so if we don't find one in time, create one. */
8928 else if (chunk_size > S390_POOL_CHUNK_MAX
8929 || curr_pool->size > S390_POOL_CHUNK_MAX
8930 || section_switch_p)
8932 rtx_insn *label, *jump, *barrier, *next, *prev;
8934 if (!section_switch_p)
8936 /* We can insert the barrier only after a 'real' insn. */
8937 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
8939 if (get_attr_length (insn) == 0)
8941 /* Don't separate LTREL_BASE from the corresponding
8942 LTREL_OFFSET load. */
8949 next = NEXT_INSN (insn);
8953 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
8954 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
8958 gcc_assert (!pending_ltrel);
8960 /* The old pool has to end before the section switch
8961 note in order to make it part of the current
8963 insn = PREV_INSN (insn);
8966 label = gen_label_rtx ();
8968 if (prev && NOTE_P (prev))
8969 prev = prev_nonnote_insn (prev);
8971 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
8972 INSN_LOCATION (prev));
8974 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
8975 barrier = emit_barrier_after (jump);
8976 insn = emit_label_after (label, barrier);
8977 JUMP_LABEL (jump) = label;
8978 LABEL_NUSES (label) = 1;
8980 INSN_ADDRESSES_NEW (jump, -1);
8981 INSN_ADDRESSES_NEW (barrier, -1);
8982 INSN_ADDRESSES_NEW (insn, -1);
8984 s390_end_pool (curr_pool, barrier);
8992 s390_end_pool (curr_pool, NULL);
8993 gcc_assert (!pending_ltrel);
8995 /* Find all labels that are branched into
8996 from an insn belonging to a different chunk. */
8998 far_labels = BITMAP_ALLOC (NULL);
9000 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9002 rtx_jump_table_data *table;
9004 /* Labels marked with LABEL_PRESERVE_P can be target
9005 of non-local jumps, so we have to mark them.
9006 The same holds for named labels.
9008 Don't do that, however, if it is the label before
9012 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9014 rtx_insn *vec_insn = NEXT_INSN (insn);
9015 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9016 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9018 /* Check potential targets in a table jump (casesi_jump). */
9019 else if (tablejump_p (insn, NULL, &table))
9021 rtx vec_pat = PATTERN (table);
9022 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9024 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9026 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9028 if (s390_find_pool (pool_list, label)
9029 != s390_find_pool (pool_list, insn))
9030 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9033 /* If we have a direct jump (conditional or unconditional),
9034 check all potential targets. */
9035 else if (JUMP_P (insn))
9037 rtx pat = PATTERN (insn);
9039 if (GET_CODE (pat) == PARALLEL)
9040 pat = XVECEXP (pat, 0, 0);
9042 if (GET_CODE (pat) == SET)
9044 rtx label = JUMP_LABEL (insn);
9045 if (label && !ANY_RETURN_P (label))
9047 if (s390_find_pool (pool_list, label)
9048 != s390_find_pool (pool_list, insn))
9049 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9055 /* Insert base register reload insns before every pool. */
9057 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9059 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9061 rtx_insn *insn = curr_pool->first_insn;
9062 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9065 /* Insert base register reload insns at every far label. */
9067 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9069 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9071 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9074 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9076 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9081 BITMAP_FREE (far_labels);
9084 /* Recompute insn addresses. */
9086 init_insn_lengths ();
9087 shorten_branches (get_insns ());
9092 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9093 After we have decided to use this list, finish implementing
9094 all changes to the current function as required. */
9097 s390_chunkify_finish (struct constant_pool *pool_list)
9099 struct constant_pool *curr_pool = NULL;
9103 /* Replace all literal pool references. */
9105 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9108 replace_ltrel_base (&PATTERN (insn));
9110 curr_pool = s390_find_pool (pool_list, insn);
9114 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9116 rtx addr, pool_ref = NULL_RTX;
9117 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9120 if (s390_execute_label (insn))
9121 addr = s390_find_execute (curr_pool, insn);
9123 addr = s390_find_constant (curr_pool,
9124 get_pool_constant (pool_ref),
9125 get_pool_mode (pool_ref));
9127 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9128 INSN_CODE (insn) = -1;
9133 /* Dump out all literal pools. */
9135 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9136 s390_dump_pool (curr_pool, 0);
9138 /* Free pool list. */
9142 struct constant_pool *next = pool_list->next;
9143 s390_free_pool (pool_list);
9148 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9149 We have decided we cannot use this list, so revert all changes
9150 to the current function that were done by s390_chunkify_start. */
9153 s390_chunkify_cancel (struct constant_pool *pool_list)
9155 struct constant_pool *curr_pool = NULL;
9158 /* Remove all pool placeholder insns. */
9160 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9162 /* Did we insert an extra barrier? Remove it. */
9163 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
9164 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
9165 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
9167 if (jump && JUMP_P (jump)
9168 && barrier && BARRIER_P (barrier)
9169 && label && LABEL_P (label)
9170 && GET_CODE (PATTERN (jump)) == SET
9171 && SET_DEST (PATTERN (jump)) == pc_rtx
9172 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
9173 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
9176 remove_insn (barrier);
9177 remove_insn (label);
9180 remove_insn (curr_pool->pool_insn);
9183 /* Remove all base register reload insns. */
9185 for (insn = get_insns (); insn; )
9187 rtx_insn *next_insn = NEXT_INSN (insn);
9189 if (NONJUMP_INSN_P (insn)
9190 && GET_CODE (PATTERN (insn)) == SET
9191 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
9192 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
9198 /* Free pool list. */
9202 struct constant_pool *next = pool_list->next;
9203 s390_free_pool (pool_list);
9208 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9211 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9213 switch (GET_MODE_CLASS (mode))
9216 case MODE_DECIMAL_FLOAT:
9217 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9219 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp), mode, align);
9223 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9224 mark_symbol_refs_as_used (exp);
9227 case MODE_VECTOR_INT:
9228 case MODE_VECTOR_FLOAT:
9231 machine_mode inner_mode;
9232 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9234 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9235 for (i = 0; i < XVECLEN (exp, 0); i++)
9236 s390_output_pool_entry (XVECEXP (exp, 0, i),
9240 : GET_MODE_BITSIZE (inner_mode));
9250 /* Return an RTL expression representing the value of the return address
9251 for the frame COUNT steps up from the current frame. FRAME is the
9252 frame pointer of that frame. */
9255 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9260 /* Without backchain, we fail for all but the current frame. */
9262 if (!TARGET_BACKCHAIN && count > 0)
9265 /* For the current frame, we need to make sure the initial
9266 value of RETURN_REGNUM is actually saved. */
9270 /* On non-z architectures branch splitting could overwrite r14. */
9271 if (TARGET_CPU_ZARCH)
9272 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9275 cfun_frame_layout.save_return_addr_p = true;
9276 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
9280 if (TARGET_PACKED_STACK)
9281 offset = -2 * UNITS_PER_LONG;
9283 offset = RETURN_REGNUM * UNITS_PER_LONG;
9285 addr = plus_constant (Pmode, frame, offset);
9286 addr = memory_address (Pmode, addr);
9287 return gen_rtx_MEM (Pmode, addr);
9290 /* Return an RTL expression representing the back chain stored in
9291 the current stack frame. */
9294 s390_back_chain_rtx (void)
9298 gcc_assert (TARGET_BACKCHAIN);
9300 if (TARGET_PACKED_STACK)
9301 chain = plus_constant (Pmode, stack_pointer_rtx,
9302 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9304 chain = stack_pointer_rtx;
9306 chain = gen_rtx_MEM (Pmode, chain);
9310 /* Find first call clobbered register unused in a function.
9311 This could be used as base register in a leaf function
9312 or for holding the return address before epilogue. */
9315 find_unused_clobbered_reg (void)
9318 for (i = 0; i < 6; i++)
9319 if (!df_regs_ever_live_p (i))
9325 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9326 clobbered hard regs in SETREG. */
9329 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9331 char *regs_ever_clobbered = (char *)data;
9332 unsigned int i, regno;
9333 machine_mode mode = GET_MODE (setreg);
9335 if (GET_CODE (setreg) == SUBREG)
9337 rtx inner = SUBREG_REG (setreg);
9338 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9340 regno = subreg_regno (setreg);
9342 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9343 regno = REGNO (setreg);
9348 i < regno + HARD_REGNO_NREGS (regno, mode);
9350 regs_ever_clobbered[i] = 1;
9353 /* Walks through all basic blocks of the current function looking
9354 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9355 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9356 each of those regs. */
9359 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9365 memset (regs_ever_clobbered, 0, 32);
9367 /* For non-leaf functions we have to consider all call clobbered regs to be
9371 for (i = 0; i < 32; i++)
9372 regs_ever_clobbered[i] = call_really_used_regs[i];
9375 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9376 this work is done by liveness analysis (mark_regs_live_at_end).
9377 Special care is needed for functions containing landing pads. Landing pads
9378 may use the eh registers, but the code which sets these registers is not
9379 contained in that function. Hence s390_regs_ever_clobbered is not able to
9380 deal with this automatically. */
9381 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9382 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9383 if (crtl->calls_eh_return
9384 || (cfun->machine->has_landing_pad_p
9385 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9386 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9388 /* For nonlocal gotos all call-saved registers have to be saved.
9389 This flag is also set for the unwinding code in libgcc.
9390 See expand_builtin_unwind_init. For regs_ever_live this is done by
9392 if (crtl->saves_all_registers)
9393 for (i = 0; i < 32; i++)
9394 if (!call_really_used_regs[i])
9395 regs_ever_clobbered[i] = 1;
9397 FOR_EACH_BB_FN (cur_bb, cfun)
9399 FOR_BB_INSNS (cur_bb, cur_insn)
9403 if (!INSN_P (cur_insn))
9406 pat = PATTERN (cur_insn);
9408 /* Ignore GPR restore insns. */
9409 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9411 if (GET_CODE (pat) == SET
9412 && GENERAL_REG_P (SET_DEST (pat)))
9415 if (GET_MODE (SET_SRC (pat)) == DImode
9416 && FP_REG_P (SET_SRC (pat)))
9420 if (GET_CODE (SET_SRC (pat)) == MEM)
9425 if (GET_CODE (pat) == PARALLEL
9426 && load_multiple_operation (pat, VOIDmode))
9431 s390_reg_clobbered_rtx,
9432 regs_ever_clobbered);
9437 /* Determine the frame area which actually has to be accessed
9438 in the function epilogue. The values are stored at the
9439 given pointers AREA_BOTTOM (address of the lowest used stack
9440 address) and AREA_TOP (address of the first item which does
9441 not belong to the stack frame). */
9444 s390_frame_area (int *area_bottom, int *area_top)
9451 if (cfun_frame_layout.first_restore_gpr != -1)
9453 b = (cfun_frame_layout.gprs_offset
9454 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9455 t = b + (cfun_frame_layout.last_restore_gpr
9456 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9459 if (TARGET_64BIT && cfun_save_high_fprs_p)
9461 b = MIN (b, cfun_frame_layout.f8_offset);
9462 t = MAX (t, (cfun_frame_layout.f8_offset
9463 + cfun_frame_layout.high_fprs * 8));
9468 if (cfun_fpr_save_p (FPR4_REGNUM))
9470 b = MIN (b, cfun_frame_layout.f4_offset);
9471 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9473 if (cfun_fpr_save_p (FPR6_REGNUM))
9475 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9476 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9482 /* Update gpr_save_slots in the frame layout trying to make use of
9483 FPRs as GPR save slots.
9484 This is a helper routine of s390_register_info. */
9487 s390_register_info_gprtofpr ()
9489 int save_reg_slot = FPR0_REGNUM;
9492 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9495 for (i = 15; i >= 6; i--)
9497 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9500 /* Advance to the next FP register which can be used as a
9502 while ((!call_really_used_regs[save_reg_slot]
9503 || df_regs_ever_live_p (save_reg_slot)
9504 || cfun_fpr_save_p (save_reg_slot))
9505 && FP_REGNO_P (save_reg_slot))
9507 if (!FP_REGNO_P (save_reg_slot))
9509 /* We only want to use ldgr/lgdr if we can get rid of
9510 stm/lm entirely. So undo the gpr slot allocation in
9511 case we ran out of FPR save slots. */
9512 for (j = 6; j <= 15; j++)
9513 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9514 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9517 cfun_gpr_save_slot (i) = save_reg_slot++;
9521 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9523 This is a helper routine for s390_register_info. */
9526 s390_register_info_stdarg_fpr ()
9532 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9533 f0-f4 for 64 bit. */
9535 || !TARGET_HARD_FLOAT
9536 || !cfun->va_list_fpr_size
9537 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9540 min_fpr = crtl->args.info.fprs;
9541 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9542 if (max_fpr >= FP_ARG_NUM_REG)
9543 max_fpr = FP_ARG_NUM_REG - 1;
9545 /* FPR argument regs start at f0. */
9546 min_fpr += FPR0_REGNUM;
9547 max_fpr += FPR0_REGNUM;
9549 for (i = min_fpr; i <= max_fpr; i++)
9550 cfun_set_fpr_save (i);
9553 /* Reserve the GPR save slots for GPRs which need to be saved due to
9555 This is a helper routine for s390_register_info. */
9558 s390_register_info_stdarg_gpr ()
9565 || !cfun->va_list_gpr_size
9566 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9569 min_gpr = crtl->args.info.gprs;
9570 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9571 if (max_gpr >= GP_ARG_NUM_REG)
9572 max_gpr = GP_ARG_NUM_REG - 1;
9574 /* GPR argument regs start at r2. */
9575 min_gpr += GPR2_REGNUM;
9576 max_gpr += GPR2_REGNUM;
9578 /* If r6 was supposed to be saved into an FPR and now needs to go to
9579 the stack for vararg we have to adjust the restore range to make
9580 sure that the restore is done from stack as well. */
9581 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9582 && min_gpr <= GPR6_REGNUM
9583 && max_gpr >= GPR6_REGNUM)
9585 if (cfun_frame_layout.first_restore_gpr == -1
9586 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9587 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9588 if (cfun_frame_layout.last_restore_gpr == -1
9589 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9590 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9593 if (cfun_frame_layout.first_save_gpr == -1
9594 || cfun_frame_layout.first_save_gpr > min_gpr)
9595 cfun_frame_layout.first_save_gpr = min_gpr;
9597 if (cfun_frame_layout.last_save_gpr == -1
9598 || cfun_frame_layout.last_save_gpr < max_gpr)
9599 cfun_frame_layout.last_save_gpr = max_gpr;
9601 for (i = min_gpr; i <= max_gpr; i++)
9602 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9605 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9606 prologue and epilogue. */
9609 s390_register_info_set_ranges ()
9613 /* Find the first and the last save slot supposed to use the stack
9614 to set the restore range.
9615 Vararg regs might be marked as save to stack but only the
9616 call-saved regs really need restoring (i.e. r6). This code
9617 assumes that the vararg regs have not yet been recorded in
9618 cfun_gpr_save_slot. */
9619 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9620 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9621 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9622 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9623 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9624 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9627 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9628 for registers which need to be saved in function prologue.
9629 This function can be used until the insns emitted for save/restore
9630 of the regs are visible in the RTL stream. */
9633 s390_register_info ()
9636 char clobbered_regs[32];
9638 gcc_assert (!epilogue_completed);
9640 if (reload_completed)
9641 /* After reload we rely on our own routine to determine which
9642 registers need saving. */
9643 s390_regs_ever_clobbered (clobbered_regs);
9645 /* During reload we use regs_ever_live as a base since reload
9646 does changes in there which we otherwise would not be aware
9648 for (i = 0; i < 32; i++)
9649 clobbered_regs[i] = df_regs_ever_live_p (i);
9651 for (i = 0; i < 32; i++)
9652 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9654 /* Mark the call-saved FPRs which need to be saved.
9655 This needs to be done before checking the special GPRs since the
9656 stack pointer usage depends on whether high FPRs have to be saved
9658 cfun_frame_layout.fpr_bitmap = 0;
9659 cfun_frame_layout.high_fprs = 0;
9660 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9661 if (clobbered_regs[i] && !call_really_used_regs[i])
9663 cfun_set_fpr_save (i);
9664 if (i >= FPR8_REGNUM)
9665 cfun_frame_layout.high_fprs++;
9668 /* Register 12 is used for GOT address, but also as temp in prologue
9669 for split-stack stdarg functions (unless r14 is available). */
9671 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9672 || (flag_split_stack && cfun->stdarg
9673 && (crtl->is_leaf || TARGET_TPF_PROFILING
9674 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9676 clobbered_regs[BASE_REGNUM]
9677 |= (cfun->machine->base_reg
9678 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9680 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9681 |= !!frame_pointer_needed;
9683 /* On pre z900 machines this might take until machine dependent
9685 save_return_addr_p will only be set on non-zarch machines so
9686 there is no risk that r14 goes into an FPR instead of a stack
9688 clobbered_regs[RETURN_REGNUM]
9690 || TARGET_TPF_PROFILING
9691 || cfun->machine->split_branches_pending_p
9692 || cfun_frame_layout.save_return_addr_p
9693 || crtl->calls_eh_return);
9695 clobbered_regs[STACK_POINTER_REGNUM]
9697 || TARGET_TPF_PROFILING
9698 || cfun_save_high_fprs_p
9699 || get_frame_size () > 0
9700 || (reload_completed && cfun_frame_layout.frame_size > 0)
9701 || cfun->calls_alloca);
9703 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9705 for (i = 6; i < 16; i++)
9706 if (clobbered_regs[i])
9707 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9709 s390_register_info_stdarg_fpr ();
9710 s390_register_info_gprtofpr ();
9711 s390_register_info_set_ranges ();
9712 /* stdarg functions might need to save GPRs 2 to 6. This might
9713 override the GPR->FPR save decision made by
9714 s390_register_info_gprtofpr for r6 since vararg regs must go to
9716 s390_register_info_stdarg_gpr ();
9719 /* This function is called by s390_optimize_prologue in order to get
9720 rid of unnecessary GPR save/restore instructions. The register info
9721 for the GPRs is re-computed and the ranges are re-calculated. */
9724 s390_optimize_register_info ()
9726 char clobbered_regs[32];
9729 gcc_assert (epilogue_completed);
9730 gcc_assert (!cfun->machine->split_branches_pending_p);
9732 s390_regs_ever_clobbered (clobbered_regs);
9734 for (i = 0; i < 32; i++)
9735 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9737 /* There is still special treatment needed for cases invisible to
9738 s390_regs_ever_clobbered. */
9739 clobbered_regs[RETURN_REGNUM]
9740 |= (TARGET_TPF_PROFILING
9741 /* When expanding builtin_return_addr in ESA mode we do not
9742 know whether r14 will later be needed as scratch reg when
9743 doing branch splitting. So the builtin always accesses the
9744 r14 save slot and we need to stick to the save/restore
9745 decision for r14 even if it turns out that it didn't get
9747 || cfun_frame_layout.save_return_addr_p
9748 || crtl->calls_eh_return);
9750 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9752 for (i = 6; i < 16; i++)
9753 if (!clobbered_regs[i])
9754 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9756 s390_register_info_set_ranges ();
9757 s390_register_info_stdarg_gpr ();
9760 /* Fill cfun->machine with info about frame of current function. */
9763 s390_frame_info (void)
9765 HOST_WIDE_INT lowest_offset;
9767 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9768 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9770 /* The va_arg builtin uses a constant distance of 16 *
9771 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9772 pointer. So even if we are going to save the stack pointer in an
9773 FPR we need the stack space in order to keep the offsets
9775 if (cfun->stdarg && cfun_save_arg_fprs_p)
9777 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9779 if (cfun_frame_layout.first_save_gpr_slot == -1)
9780 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9783 cfun_frame_layout.frame_size = get_frame_size ();
9784 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9785 fatal_error (input_location,
9786 "total size of local variables exceeds architecture limit");
9788 if (!TARGET_PACKED_STACK)
9790 /* Fixed stack layout. */
9791 cfun_frame_layout.backchain_offset = 0;
9792 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9793 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9794 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9795 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9798 else if (TARGET_BACKCHAIN)
9800 /* Kernel stack layout - packed stack, backchain, no float */
9801 gcc_assert (TARGET_SOFT_FLOAT);
9802 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9805 /* The distance between the backchain and the return address
9806 save slot must not change. So we always need a slot for the
9807 stack pointer which resides in between. */
9808 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9810 cfun_frame_layout.gprs_offset
9811 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9813 /* FPRs will not be saved. Nevertheless pick sane values to
9814 keep area calculations valid. */
9815 cfun_frame_layout.f0_offset =
9816 cfun_frame_layout.f4_offset =
9817 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9823 /* Packed stack layout without backchain. */
9825 /* With stdarg FPRs need their dedicated slots. */
9826 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9827 : (cfun_fpr_save_p (FPR4_REGNUM) +
9828 cfun_fpr_save_p (FPR6_REGNUM)));
9829 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9831 num_fprs = (cfun->stdarg ? 2
9832 : (cfun_fpr_save_p (FPR0_REGNUM)
9833 + cfun_fpr_save_p (FPR2_REGNUM)));
9834 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9836 cfun_frame_layout.gprs_offset
9837 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9839 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9840 - cfun_frame_layout.high_fprs * 8);
9843 if (cfun_save_high_fprs_p)
9844 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9847 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9849 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9850 sized area at the bottom of the stack. This is required also for
9851 leaf functions. When GCC generates a local stack reference it
9852 will always add STACK_POINTER_OFFSET to all these references. */
9854 && !TARGET_TPF_PROFILING
9855 && cfun_frame_layout.frame_size == 0
9856 && !cfun->calls_alloca)
9859 /* Calculate the number of bytes we have used in our own register
9860 save area. With the packed stack layout we can re-use the
9861 remaining bytes for normal stack elements. */
9863 if (TARGET_PACKED_STACK)
9864 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9865 cfun_frame_layout.f4_offset),
9866 cfun_frame_layout.gprs_offset);
9870 if (TARGET_BACKCHAIN)
9871 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9873 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9875 /* If under 31 bit an odd number of gprs has to be saved we have to
9876 adjust the frame size to sustain 8 byte alignment of stack
9878 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9879 STACK_BOUNDARY / BITS_PER_UNIT - 1)
9880 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9883 /* Generate frame layout. Fills in register and frame data for the current
9884 function in cfun->machine. This routine can be called multiple times;
9885 it will re-do the complete frame layout every time. */
9888 s390_init_frame_layout (void)
9890 HOST_WIDE_INT frame_size;
9893 /* After LRA the frame layout is supposed to be read-only and should
9894 not be re-computed. */
9895 if (reload_completed)
9898 /* On S/390 machines, we may need to perform branch splitting, which
9899 will require both base and return address register. We have no
9900 choice but to assume we're going to need them until right at the
9901 end of the machine dependent reorg phase. */
9902 if (!TARGET_CPU_ZARCH)
9903 cfun->machine->split_branches_pending_p = true;
9907 frame_size = cfun_frame_layout.frame_size;
9909 /* Try to predict whether we'll need the base register. */
9910 base_used = cfun->machine->split_branches_pending_p
9911 || crtl->uses_const_pool
9912 || (!DISP_IN_RANGE (frame_size)
9913 && !CONST_OK_FOR_K (frame_size));
9915 /* Decide which register to use as literal pool base. In small
9916 leaf functions, try to use an unused call-clobbered register
9917 as base register to avoid save/restore overhead. */
9919 cfun->machine->base_reg = NULL_RTX;
9925 /* Prefer r5 (most likely to be free). */
9926 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
9928 cfun->machine->base_reg =
9929 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
9932 s390_register_info ();
9935 while (frame_size != cfun_frame_layout.frame_size);
9938 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
9939 the TX is nonescaping. A transaction is considered escaping if
9940 there is at least one path from tbegin returning CC0 to the
9941 function exit block without an tend.
9943 The check so far has some limitations:
9944 - only single tbegin/tend BBs are supported
9945 - the first cond jump after tbegin must separate the CC0 path from ~CC0
9946 - when CC is copied to a GPR and the CC0 check is done with the GPR
9947 this is not supported
9951 s390_optimize_nonescaping_tx (void)
9953 const unsigned int CC0 = 1 << 3;
9954 basic_block tbegin_bb = NULL;
9955 basic_block tend_bb = NULL;
9960 rtx_insn *tbegin_insn = NULL;
9962 if (!cfun->machine->tbegin_p)
9965 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
9967 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
9972 FOR_BB_INSNS (bb, insn)
9974 rtx ite, cc, pat, target;
9975 unsigned HOST_WIDE_INT mask;
9977 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
9980 pat = PATTERN (insn);
9982 if (GET_CODE (pat) == PARALLEL)
9983 pat = XVECEXP (pat, 0, 0);
9985 if (GET_CODE (pat) != SET
9986 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
9989 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
9995 /* Just return if the tbegin doesn't have clobbers. */
9996 if (GET_CODE (PATTERN (insn)) != PARALLEL)
9999 if (tbegin_bb != NULL)
10002 /* Find the next conditional jump. */
10003 for (tmp = NEXT_INSN (insn);
10005 tmp = NEXT_INSN (tmp))
10007 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10012 ite = SET_SRC (PATTERN (tmp));
10013 if (GET_CODE (ite) != IF_THEN_ELSE)
10016 cc = XEXP (XEXP (ite, 0), 0);
10017 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10018 || GET_MODE (cc) != CCRAWmode
10019 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10022 if (bb->succs->length () != 2)
10025 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10026 if (GET_CODE (XEXP (ite, 0)) == NE)
10030 target = XEXP (ite, 1);
10031 else if (mask == (CC0 ^ 0xf))
10032 target = XEXP (ite, 2);
10040 ei = ei_start (bb->succs);
10041 e1 = ei_safe_edge (ei);
10043 e2 = ei_safe_edge (ei);
10045 if (e2->flags & EDGE_FALLTHRU)
10048 e1 = ei_safe_edge (ei);
10051 if (!(e1->flags & EDGE_FALLTHRU))
10054 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10056 if (tmp == BB_END (bb))
10061 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10063 if (tend_bb != NULL)
10070 /* Either we successfully remove the FPR clobbers here or we are not
10071 able to do anything for this TX. Both cases don't qualify for
10073 cfun->machine->tbegin_p = false;
10075 if (tbegin_bb == NULL || tend_bb == NULL)
10078 calculate_dominance_info (CDI_POST_DOMINATORS);
10079 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10080 free_dominance_info (CDI_POST_DOMINATORS);
10085 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10087 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10088 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10089 INSN_CODE (tbegin_insn) = -1;
10090 df_insn_rescan (tbegin_insn);
10095 /* Return true if it is legal to put a value with MODE into REGNO. */
10098 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10100 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10103 switch (REGNO_REG_CLASS (regno))
10106 return ((GET_MODE_CLASS (mode) == MODE_INT
10107 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10109 || s390_vector_mode_supported_p (mode));
10113 && ((GET_MODE_CLASS (mode) == MODE_INT
10114 && s390_class_max_nregs (FP_REGS, mode) == 1)
10116 || s390_vector_mode_supported_p (mode)))
10119 if (REGNO_PAIR_OK (regno, mode))
10121 if (mode == SImode || mode == DImode)
10124 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10129 if (FRAME_REGNO_P (regno) && mode == Pmode)
10134 if (REGNO_PAIR_OK (regno, mode))
10137 || (mode != TFmode && mode != TCmode && mode != TDmode))
10142 if (GET_MODE_CLASS (mode) == MODE_CC)
10146 if (REGNO_PAIR_OK (regno, mode))
10148 if (mode == SImode || mode == Pmode)
10159 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10162 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10164 /* Once we've decided upon a register to use as base register, it must
10165 no longer be used for any other purpose. */
10166 if (cfun->machine->base_reg)
10167 if (REGNO (cfun->machine->base_reg) == old_reg
10168 || REGNO (cfun->machine->base_reg) == new_reg)
10171 /* Prevent regrename from using call-saved regs which haven't
10172 actually been saved. This is necessary since regrename assumes
10173 the backend save/restore decisions are based on
10174 df_regs_ever_live. Since we have our own routine we have to tell
10175 regrename manually about it. */
10176 if (GENERAL_REGNO_P (new_reg)
10177 && !call_really_used_regs[new_reg]
10178 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10184 /* Return nonzero if register REGNO can be used as a scratch register
10188 s390_hard_regno_scratch_ok (unsigned int regno)
10190 /* See s390_hard_regno_rename_ok. */
10191 if (GENERAL_REGNO_P (regno)
10192 && !call_really_used_regs[regno]
10193 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10199 /* Maximum number of registers to represent a value of mode MODE
10200 in a register of class RCLASS. */
10203 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10206 bool reg_pair_required_p = false;
10212 reg_size = TARGET_VX ? 16 : 8;
10214 /* TF and TD modes would fit into a VR but we put them into a
10215 register pair since we do not have 128bit FP instructions on
10218 && SCALAR_FLOAT_MODE_P (mode)
10219 && GET_MODE_SIZE (mode) >= 16)
10220 reg_pair_required_p = true;
10222 /* Even if complex types would fit into a single FPR/VR we force
10223 them into a register pair to deal with the parts more easily.
10224 (FIXME: What about complex ints?) */
10225 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10226 reg_pair_required_p = true;
10232 reg_size = UNITS_PER_WORD;
10236 if (reg_pair_required_p)
10237 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10239 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10242 /* Return TRUE if changing mode from FROM to TO should not be allowed
10243 for register class CLASS. */
10246 s390_cannot_change_mode_class (machine_mode from_mode,
10247 machine_mode to_mode,
10248 enum reg_class rclass)
10250 machine_mode small_mode;
10251 machine_mode big_mode;
10253 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10256 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10258 small_mode = from_mode;
10259 big_mode = to_mode;
10263 small_mode = to_mode;
10264 big_mode = from_mode;
10267 /* Values residing in VRs are little-endian style. All modes are
10268 placed left-aligned in an VR. This means that we cannot allow
10269 switching between modes with differing sizes. Also if the vector
10270 facility is available we still place TFmode values in VR register
10271 pairs, since the only instructions we have operating on TFmodes
10272 only deal with register pairs. Therefore we have to allow DFmode
10273 subregs of TFmodes to enable the TFmode splitters. */
10274 if (reg_classes_intersect_p (VEC_REGS, rclass)
10275 && (GET_MODE_SIZE (small_mode) < 8
10276 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10279 /* Likewise for access registers, since they have only half the
10280 word size on 64-bit. */
10281 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10287 /* Return true if we use LRA instead of reload pass. */
10291 return s390_lra_flag;
10294 /* Return true if register FROM can be eliminated via register TO. */
10297 s390_can_eliminate (const int from, const int to)
10299 /* On zSeries machines, we have not marked the base register as fixed.
10300 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10301 If a function requires the base register, we say here that this
10302 elimination cannot be performed. This will cause reload to free
10303 up the base register (as if it were fixed). On the other hand,
10304 if the current function does *not* require the base register, we
10305 say here the elimination succeeds, which in turn allows reload
10306 to allocate the base register for any other purpose. */
10307 if (from == BASE_REGNUM && to == BASE_REGNUM)
10309 if (TARGET_CPU_ZARCH)
10311 s390_init_frame_layout ();
10312 return cfun->machine->base_reg == NULL_RTX;
10318 /* Everything else must point into the stack frame. */
10319 gcc_assert (to == STACK_POINTER_REGNUM
10320 || to == HARD_FRAME_POINTER_REGNUM);
10322 gcc_assert (from == FRAME_POINTER_REGNUM
10323 || from == ARG_POINTER_REGNUM
10324 || from == RETURN_ADDRESS_POINTER_REGNUM);
10326 /* Make sure we actually saved the return address. */
10327 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10328 if (!crtl->calls_eh_return
10330 && !cfun_frame_layout.save_return_addr_p)
10336 /* Return offset between register FROM and TO initially after prolog. */
10339 s390_initial_elimination_offset (int from, int to)
10341 HOST_WIDE_INT offset;
10343 /* ??? Why are we called for non-eliminable pairs? */
10344 if (!s390_can_eliminate (from, to))
10349 case FRAME_POINTER_REGNUM:
10350 offset = (get_frame_size()
10351 + STACK_POINTER_OFFSET
10352 + crtl->outgoing_args_size);
10355 case ARG_POINTER_REGNUM:
10356 s390_init_frame_layout ();
10357 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10360 case RETURN_ADDRESS_POINTER_REGNUM:
10361 s390_init_frame_layout ();
10363 if (cfun_frame_layout.first_save_gpr_slot == -1)
10365 /* If it turns out that for stdarg nothing went into the reg
10366 save area we also do not need the return address
10368 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10371 gcc_unreachable ();
10374 /* In order to make the following work it is not necessary for
10375 r14 to have a save slot. It is sufficient if one other GPR
10376 got one. Since the GPRs are always stored without gaps we
10377 are able to calculate where the r14 save slot would
10379 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10380 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10389 gcc_unreachable ();
10395 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10396 to register BASE. Return generated insn. */
10399 save_fpr (rtx base, int offset, int regnum)
10402 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10404 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10405 set_mem_alias_set (addr, get_varargs_alias_set ());
10407 set_mem_alias_set (addr, get_frame_alias_set ());
10409 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10412 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10413 to register BASE. Return generated insn. */
10416 restore_fpr (rtx base, int offset, int regnum)
10419 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10420 set_mem_alias_set (addr, get_frame_alias_set ());
10422 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10425 /* Return true if REGNO is a global register, but not one
10426 of the special ones that need to be saved/restored in anyway. */
10429 global_not_special_regno_p (int regno)
10431 return (global_regs[regno]
10432 /* These registers are special and need to be
10433 restored in any case. */
10434 && !(regno == STACK_POINTER_REGNUM
10435 || regno == RETURN_REGNUM
10436 || regno == BASE_REGNUM
10437 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10440 /* Generate insn to save registers FIRST to LAST into
10441 the register save area located at offset OFFSET
10442 relative to register BASE. */
10445 save_gprs (rtx base, int offset, int first, int last)
10447 rtx addr, insn, note;
10450 addr = plus_constant (Pmode, base, offset);
10451 addr = gen_rtx_MEM (Pmode, addr);
10453 set_mem_alias_set (addr, get_frame_alias_set ());
10455 /* Special-case single register. */
10459 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10461 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10463 if (!global_not_special_regno_p (first))
10464 RTX_FRAME_RELATED_P (insn) = 1;
10469 insn = gen_store_multiple (addr,
10470 gen_rtx_REG (Pmode, first),
10471 GEN_INT (last - first + 1));
10473 if (first <= 6 && cfun->stdarg)
10474 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10476 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10478 if (first + i <= 6)
10479 set_mem_alias_set (mem, get_varargs_alias_set ());
10482 /* We need to set the FRAME_RELATED flag on all SETs
10483 inside the store-multiple pattern.
10485 However, we must not emit DWARF records for registers 2..5
10486 if they are stored for use by variable arguments ...
10488 ??? Unfortunately, it is not enough to simply not the
10489 FRAME_RELATED flags for those SETs, because the first SET
10490 of the PARALLEL is always treated as if it had the flag
10491 set, even if it does not. Therefore we emit a new pattern
10492 without those registers as REG_FRAME_RELATED_EXPR note. */
10494 if (first >= 6 && !global_not_special_regno_p (first))
10496 rtx pat = PATTERN (insn);
10498 for (i = 0; i < XVECLEN (pat, 0); i++)
10499 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10500 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10502 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10504 RTX_FRAME_RELATED_P (insn) = 1;
10506 else if (last >= 6)
10510 for (start = first >= 6 ? first : 6; start <= last; start++)
10511 if (!global_not_special_regno_p (start))
10517 addr = plus_constant (Pmode, base,
10518 offset + (start - first) * UNITS_PER_LONG);
10523 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10524 gen_rtx_REG (Pmode, start));
10526 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10527 gen_rtx_REG (Pmode, start));
10528 note = PATTERN (note);
10530 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10531 RTX_FRAME_RELATED_P (insn) = 1;
10536 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10537 gen_rtx_REG (Pmode, start),
10538 GEN_INT (last - start + 1));
10539 note = PATTERN (note);
10541 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10543 for (i = 0; i < XVECLEN (note, 0); i++)
10544 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10545 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10547 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10549 RTX_FRAME_RELATED_P (insn) = 1;
10555 /* Generate insn to restore registers FIRST to LAST from
10556 the register save area located at offset OFFSET
10557 relative to register BASE. */
10560 restore_gprs (rtx base, int offset, int first, int last)
10564 addr = plus_constant (Pmode, base, offset);
10565 addr = gen_rtx_MEM (Pmode, addr);
10566 set_mem_alias_set (addr, get_frame_alias_set ());
10568 /* Special-case single register. */
10572 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10574 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10576 RTX_FRAME_RELATED_P (insn) = 1;
10580 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10582 GEN_INT (last - first + 1));
10583 RTX_FRAME_RELATED_P (insn) = 1;
10587 /* Return insn sequence to load the GOT register. */
10589 static GTY(()) rtx got_symbol;
10591 s390_load_got (void)
10595 /* We cannot use pic_offset_table_rtx here since we use this
10596 function also for non-pic if __tls_get_offset is called and in
10597 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10599 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10603 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10604 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
10609 if (TARGET_CPU_ZARCH)
10611 emit_move_insn (got_rtx, got_symbol);
10617 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
10618 UNSPEC_LTREL_OFFSET);
10619 offset = gen_rtx_CONST (Pmode, offset);
10620 offset = force_const_mem (Pmode, offset);
10622 emit_move_insn (got_rtx, offset);
10624 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
10625 UNSPEC_LTREL_BASE);
10626 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
10628 emit_move_insn (got_rtx, offset);
10631 insns = get_insns ();
10636 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10637 and the change to the stack pointer. */
10640 s390_emit_stack_tie (void)
10642 rtx mem = gen_frame_mem (BLKmode,
10643 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10645 emit_insn (gen_stack_tie (mem));
10648 /* Copy GPRS into FPR save slots. */
10651 s390_save_gprs_to_fprs (void)
10655 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10658 for (i = 6; i < 16; i++)
10660 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10663 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10664 gen_rtx_REG (DImode, i));
10665 RTX_FRAME_RELATED_P (insn) = 1;
10666 /* This prevents dwarf2cfi from interpreting the set. Doing
10667 so it might emit def_cfa_register infos setting an FPR as
10669 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10674 /* Restore GPRs from FPR save slots. */
10677 s390_restore_gprs_from_fprs (void)
10681 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10684 for (i = 6; i < 16; i++)
10688 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10691 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10693 if (i == STACK_POINTER_REGNUM)
10694 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10696 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10698 df_set_regs_ever_live (i, true);
10699 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10700 if (i == STACK_POINTER_REGNUM)
10701 add_reg_note (insn, REG_CFA_DEF_CFA,
10702 plus_constant (Pmode, stack_pointer_rtx,
10703 STACK_POINTER_OFFSET));
10704 RTX_FRAME_RELATED_P (insn) = 1;
10709 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10714 const pass_data pass_data_s390_early_mach =
10716 RTL_PASS, /* type */
10717 "early_mach", /* name */
10718 OPTGROUP_NONE, /* optinfo_flags */
10719 TV_MACH_DEP, /* tv_id */
10720 0, /* properties_required */
10721 0, /* properties_provided */
10722 0, /* properties_destroyed */
10723 0, /* todo_flags_start */
10724 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10727 class pass_s390_early_mach : public rtl_opt_pass
10730 pass_s390_early_mach (gcc::context *ctxt)
10731 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10734 /* opt_pass methods: */
10735 virtual unsigned int execute (function *);
10737 }; // class pass_s390_early_mach
10740 pass_s390_early_mach::execute (function *fun)
10744 /* Try to get rid of the FPR clobbers. */
10745 s390_optimize_nonescaping_tx ();
10747 /* Re-compute register info. */
10748 s390_register_info ();
10750 /* If we're using a base register, ensure that it is always valid for
10751 the first non-prologue instruction. */
10752 if (fun->machine->base_reg)
10753 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10755 /* Annotate all constant pool references to let the scheduler know
10756 they implicitly use the base register. */
10757 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10760 annotate_constant_pool_refs (&PATTERN (insn));
10761 df_insn_rescan (insn);
10766 } // anon namespace
10768 /* Expand the prologue into a bunch of separate insns. */
10771 s390_emit_prologue (void)
10779 /* Choose best register to use for temp use within prologue.
10780 TPF with profiling must avoid the register 14 - the tracing function
10781 needs the original contents of r14 to be preserved. */
10783 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10785 && !TARGET_TPF_PROFILING)
10786 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10787 else if (flag_split_stack && cfun->stdarg)
10788 temp_reg = gen_rtx_REG (Pmode, 12);
10790 temp_reg = gen_rtx_REG (Pmode, 1);
10792 s390_save_gprs_to_fprs ();
10794 /* Save call saved gprs. */
10795 if (cfun_frame_layout.first_save_gpr != -1)
10797 insn = save_gprs (stack_pointer_rtx,
10798 cfun_frame_layout.gprs_offset +
10799 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
10800 - cfun_frame_layout.first_save_gpr_slot),
10801 cfun_frame_layout.first_save_gpr,
10802 cfun_frame_layout.last_save_gpr);
10806 /* Dummy insn to mark literal pool slot. */
10808 if (cfun->machine->base_reg)
10809 emit_insn (gen_main_pool (cfun->machine->base_reg));
10811 offset = cfun_frame_layout.f0_offset;
10813 /* Save f0 and f2. */
10814 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
10816 if (cfun_fpr_save_p (i))
10818 save_fpr (stack_pointer_rtx, offset, i);
10821 else if (!TARGET_PACKED_STACK || cfun->stdarg)
10825 /* Save f4 and f6. */
10826 offset = cfun_frame_layout.f4_offset;
10827 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10829 if (cfun_fpr_save_p (i))
10831 insn = save_fpr (stack_pointer_rtx, offset, i);
10834 /* If f4 and f6 are call clobbered they are saved due to
10835 stdargs and therefore are not frame related. */
10836 if (!call_really_used_regs[i])
10837 RTX_FRAME_RELATED_P (insn) = 1;
10839 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
10843 if (TARGET_PACKED_STACK
10844 && cfun_save_high_fprs_p
10845 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
10847 offset = (cfun_frame_layout.f8_offset
10848 + (cfun_frame_layout.high_fprs - 1) * 8);
10850 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
10851 if (cfun_fpr_save_p (i))
10853 insn = save_fpr (stack_pointer_rtx, offset, i);
10855 RTX_FRAME_RELATED_P (insn) = 1;
10858 if (offset >= cfun_frame_layout.f8_offset)
10862 if (!TARGET_PACKED_STACK)
10863 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
10865 if (flag_stack_usage_info)
10866 current_function_static_stack_size = cfun_frame_layout.frame_size;
10868 /* Decrement stack pointer. */
10870 if (cfun_frame_layout.frame_size > 0)
10872 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10873 rtx real_frame_off;
10875 if (s390_stack_size)
10877 HOST_WIDE_INT stack_guard;
10879 if (s390_stack_guard)
10880 stack_guard = s390_stack_guard;
10883 /* If no value for stack guard is provided the smallest power of 2
10884 larger than the current frame size is chosen. */
10886 while (stack_guard < cfun_frame_layout.frame_size)
10890 if (cfun_frame_layout.frame_size >= s390_stack_size)
10892 warning (0, "frame size of function %qs is %wd"
10893 " bytes exceeding user provided stack limit of "
10895 "An unconditional trap is added.",
10896 current_function_name(), cfun_frame_layout.frame_size,
10898 emit_insn (gen_trap ());
10903 /* stack_guard has to be smaller than s390_stack_size.
10904 Otherwise we would emit an AND with zero which would
10905 not match the test under mask pattern. */
10906 if (stack_guard >= s390_stack_size)
10908 warning (0, "frame size of function %qs is %wd"
10909 " bytes which is more than half the stack size. "
10910 "The dynamic check would not be reliable. "
10911 "No check emitted for this function.",
10912 current_function_name(),
10913 cfun_frame_layout.frame_size);
10917 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
10918 & ~(stack_guard - 1));
10920 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
10921 GEN_INT (stack_check_mask));
10923 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
10925 t, const0_rtx, const0_rtx));
10927 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
10929 t, const0_rtx, const0_rtx));
10934 if (s390_warn_framesize > 0
10935 && cfun_frame_layout.frame_size >= s390_warn_framesize)
10936 warning (0, "frame size of %qs is %wd bytes",
10937 current_function_name (), cfun_frame_layout.frame_size);
10939 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
10940 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
10942 /* Save incoming stack pointer into temp reg. */
10943 if (TARGET_BACKCHAIN || next_fpr)
10944 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
10946 /* Subtract frame size from stack pointer. */
10948 if (DISP_IN_RANGE (INTVAL (frame_off)))
10950 insn = gen_rtx_SET (stack_pointer_rtx,
10951 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10953 insn = emit_insn (insn);
10957 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10958 frame_off = force_const_mem (Pmode, frame_off);
10960 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
10961 annotate_constant_pool_refs (&PATTERN (insn));
10964 RTX_FRAME_RELATED_P (insn) = 1;
10965 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10966 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10967 gen_rtx_SET (stack_pointer_rtx,
10968 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10971 /* Set backchain. */
10973 if (TARGET_BACKCHAIN)
10975 if (cfun_frame_layout.backchain_offset)
10976 addr = gen_rtx_MEM (Pmode,
10977 plus_constant (Pmode, stack_pointer_rtx,
10978 cfun_frame_layout.backchain_offset));
10980 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10981 set_mem_alias_set (addr, get_frame_alias_set ());
10982 insn = emit_insn (gen_move_insn (addr, temp_reg));
10985 /* If we support non-call exceptions (e.g. for Java),
10986 we need to make sure the backchain pointer is set up
10987 before any possibly trapping memory access. */
10988 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
10990 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10991 emit_clobber (addr);
10995 /* Save fprs 8 - 15 (64 bit ABI). */
10997 if (cfun_save_high_fprs_p && next_fpr)
10999 /* If the stack might be accessed through a different register
11000 we have to make sure that the stack pointer decrement is not
11001 moved below the use of the stack slots. */
11002 s390_emit_stack_tie ();
11004 insn = emit_insn (gen_add2_insn (temp_reg,
11005 GEN_INT (cfun_frame_layout.f8_offset)));
11009 for (i = FPR8_REGNUM; i <= next_fpr; i++)
11010 if (cfun_fpr_save_p (i))
11012 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11013 cfun_frame_layout.frame_size
11014 + cfun_frame_layout.f8_offset
11017 insn = save_fpr (temp_reg, offset, i);
11019 RTX_FRAME_RELATED_P (insn) = 1;
11020 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11021 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11022 gen_rtx_REG (DFmode, i)));
11026 /* Set frame pointer, if needed. */
11028 if (frame_pointer_needed)
11030 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11031 RTX_FRAME_RELATED_P (insn) = 1;
11034 /* Set up got pointer, if needed. */
11036 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11038 rtx_insn *insns = s390_load_got ();
11040 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11041 annotate_constant_pool_refs (&PATTERN (insn));
11046 if (TARGET_TPF_PROFILING)
11048 /* Generate a BAS instruction to serve as a function
11049 entry intercept to facilitate the use of tracing
11050 algorithms located at the branch target. */
11051 emit_insn (gen_prologue_tpf ());
11053 /* Emit a blockage here so that all code
11054 lies between the profiling mechanisms. */
11055 emit_insn (gen_blockage ());
11059 /* Expand the epilogue into a bunch of separate insns. */
11062 s390_emit_epilogue (bool sibcall)
11064 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
11065 int area_bottom, area_top, offset = 0;
11070 if (TARGET_TPF_PROFILING)
11073 /* Generate a BAS instruction to serve as a function
11074 entry intercept to facilitate the use of tracing
11075 algorithms located at the branch target. */
11077 /* Emit a blockage here so that all code
11078 lies between the profiling mechanisms. */
11079 emit_insn (gen_blockage ());
11081 emit_insn (gen_epilogue_tpf ());
11084 /* Check whether to use frame or stack pointer for restore. */
11086 frame_pointer = (frame_pointer_needed
11087 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11089 s390_frame_area (&area_bottom, &area_top);
11091 /* Check whether we can access the register save area.
11092 If not, increment the frame pointer as required. */
11094 if (area_top <= area_bottom)
11096 /* Nothing to restore. */
11098 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11099 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11101 /* Area is in range. */
11102 offset = cfun_frame_layout.frame_size;
11106 rtx insn, frame_off, cfa;
11108 offset = area_bottom < 0 ? -area_bottom : 0;
11109 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11111 cfa = gen_rtx_SET (frame_pointer,
11112 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11113 if (DISP_IN_RANGE (INTVAL (frame_off)))
11115 insn = gen_rtx_SET (frame_pointer,
11116 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11117 insn = emit_insn (insn);
11121 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11122 frame_off = force_const_mem (Pmode, frame_off);
11124 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11125 annotate_constant_pool_refs (&PATTERN (insn));
11127 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11128 RTX_FRAME_RELATED_P (insn) = 1;
11131 /* Restore call saved fprs. */
11135 if (cfun_save_high_fprs_p)
11137 next_offset = cfun_frame_layout.f8_offset;
11138 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11140 if (cfun_fpr_save_p (i))
11142 restore_fpr (frame_pointer,
11143 offset + next_offset, i);
11145 = alloc_reg_note (REG_CFA_RESTORE,
11146 gen_rtx_REG (DFmode, i), cfa_restores);
11155 next_offset = cfun_frame_layout.f4_offset;
11157 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11159 if (cfun_fpr_save_p (i))
11161 restore_fpr (frame_pointer,
11162 offset + next_offset, i);
11164 = alloc_reg_note (REG_CFA_RESTORE,
11165 gen_rtx_REG (DFmode, i), cfa_restores);
11168 else if (!TARGET_PACKED_STACK)
11174 /* Return register. */
11176 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11178 /* Restore call saved gprs. */
11180 if (cfun_frame_layout.first_restore_gpr != -1)
11185 /* Check for global register and save them
11186 to stack location from where they get restored. */
11188 for (i = cfun_frame_layout.first_restore_gpr;
11189 i <= cfun_frame_layout.last_restore_gpr;
11192 if (global_not_special_regno_p (i))
11194 addr = plus_constant (Pmode, frame_pointer,
11195 offset + cfun_frame_layout.gprs_offset
11196 + (i - cfun_frame_layout.first_save_gpr_slot)
11198 addr = gen_rtx_MEM (Pmode, addr);
11199 set_mem_alias_set (addr, get_frame_alias_set ());
11200 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11204 = alloc_reg_note (REG_CFA_RESTORE,
11205 gen_rtx_REG (Pmode, i), cfa_restores);
11210 /* Fetch return address from stack before load multiple,
11211 this will do good for scheduling.
11213 Only do this if we already decided that r14 needs to be
11214 saved to a stack slot. (And not just because r14 happens to
11215 be in between two GPRs which need saving.) Otherwise it
11216 would be difficult to take that decision back in
11217 s390_optimize_prologue. */
11218 if (cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK)
11220 int return_regnum = find_unused_clobbered_reg();
11221 if (!return_regnum)
11223 return_reg = gen_rtx_REG (Pmode, return_regnum);
11225 addr = plus_constant (Pmode, frame_pointer,
11226 offset + cfun_frame_layout.gprs_offset
11228 - cfun_frame_layout.first_save_gpr_slot)
11230 addr = gen_rtx_MEM (Pmode, addr);
11231 set_mem_alias_set (addr, get_frame_alias_set ());
11232 emit_move_insn (return_reg, addr);
11234 /* Once we did that optimization we have to make sure
11235 s390_optimize_prologue does not try to remove the
11236 store of r14 since we will not be able to find the
11237 load issued here. */
11238 cfun_frame_layout.save_return_addr_p = true;
11242 insn = restore_gprs (frame_pointer,
11243 offset + cfun_frame_layout.gprs_offset
11244 + (cfun_frame_layout.first_restore_gpr
11245 - cfun_frame_layout.first_save_gpr_slot)
11247 cfun_frame_layout.first_restore_gpr,
11248 cfun_frame_layout.last_restore_gpr);
11249 insn = emit_insn (insn);
11250 REG_NOTES (insn) = cfa_restores;
11251 add_reg_note (insn, REG_CFA_DEF_CFA,
11252 plus_constant (Pmode, stack_pointer_rtx,
11253 STACK_POINTER_OFFSET));
11254 RTX_FRAME_RELATED_P (insn) = 1;
11257 s390_restore_gprs_from_fprs ();
11262 /* Return to caller. */
11264 p = rtvec_alloc (2);
11266 RTVEC_ELT (p, 0) = ret_rtx;
11267 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
11268 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
11272 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11275 s300_set_up_by_prologue (hard_reg_set_container *regs)
11277 if (cfun->machine->base_reg
11278 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11279 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11282 /* -fsplit-stack support. */
11284 /* A SYMBOL_REF for __morestack. */
11285 static GTY(()) rtx morestack_ref;
11287 /* When using -fsplit-stack, the allocation routines set a field in
11288 the TCB to the bottom of the stack plus this much space, measured
11291 #define SPLIT_STACK_AVAILABLE 1024
11293 /* Emit -fsplit-stack prologue, which goes before the regular function
11297 s390_expand_split_stack_prologue (void)
11299 rtx r1, guard, cc = NULL;
11301 /* Offset from thread pointer to __private_ss. */
11302 int psso = TARGET_64BIT ? 0x38 : 0x20;
11303 /* Pointer size in bytes. */
11304 /* Frame size and argument size - the two parameters to __morestack. */
11305 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11306 /* Align argument size to 8 bytes - simplifies __morestack code. */
11307 HOST_WIDE_INT args_size = crtl->args.size >= 0
11308 ? ((crtl->args.size + 7) & ~7)
11310 /* Label to be called by __morestack. */
11311 rtx_code_label *call_done = NULL;
11312 rtx_code_label *parm_base = NULL;
11315 gcc_assert (flag_split_stack && reload_completed);
11316 if (!TARGET_CPU_ZARCH)
11318 sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11322 r1 = gen_rtx_REG (Pmode, 1);
11324 /* If no stack frame will be allocated, don't do anything. */
11327 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11329 /* If va_start is used, just use r15. */
11330 emit_move_insn (r1,
11331 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11332 GEN_INT (STACK_POINTER_OFFSET)));
11338 if (morestack_ref == NULL_RTX)
11340 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11341 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11342 | SYMBOL_FLAG_FUNCTION);
11345 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11347 /* If frame_size will fit in an add instruction, do a stack space
11348 check, and only call __morestack if there's not enough space. */
11350 /* Get thread pointer. r1 is the only register we can always destroy - r0
11351 could contain a static chain (and cannot be used to address memory
11352 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11353 emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
11354 /* Aim at __private_ss. */
11355 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11357 /* If less that 1kiB used, skip addition and compare directly with
11359 if (frame_size > SPLIT_STACK_AVAILABLE)
11361 emit_move_insn (r1, guard);
11363 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11365 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11369 /* Compare the (maybe adjusted) guard with the stack pointer. */
11370 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11373 call_done = gen_label_rtx ();
11374 parm_base = gen_label_rtx ();
11376 /* Emit the parameter block. */
11377 tmp = gen_split_stack_data (parm_base, call_done,
11378 GEN_INT (frame_size),
11379 GEN_INT (args_size));
11380 insn = emit_insn (tmp);
11381 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11382 LABEL_NUSES (call_done)++;
11383 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11384 LABEL_NUSES (parm_base)++;
11386 /* %r1 = litbase. */
11387 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11388 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11389 LABEL_NUSES (parm_base)++;
11391 /* Now, we need to call __morestack. It has very special calling
11392 conventions: it preserves param/return/static chain registers for
11393 calling main function body, and looks for its own parameters at %r1. */
11397 tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
11399 insn = emit_jump_insn (tmp);
11400 JUMP_LABEL (insn) = call_done;
11401 LABEL_NUSES (call_done)++;
11403 /* Mark the jump as very unlikely to be taken. */
11404 add_int_reg_note (insn, REG_BR_PROB, REG_BR_PROB_BASE / 100);
11406 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11408 /* If va_start is used, and __morestack was not called, just use
11410 emit_move_insn (r1,
11411 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11412 GEN_INT (STACK_POINTER_OFFSET)));
11417 tmp = gen_split_stack_call (morestack_ref, call_done);
11418 insn = emit_jump_insn (tmp);
11419 JUMP_LABEL (insn) = call_done;
11420 LABEL_NUSES (call_done)++;
11424 /* __morestack will call us here. */
11426 emit_label (call_done);
11429 /* We may have to tell the dataflow pass that the split stack prologue
11430 is initializing a register. */
11433 s390_live_on_entry (bitmap regs)
11435 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11437 gcc_assert (flag_split_stack);
11438 bitmap_set_bit (regs, 1);
11442 /* Return true if the function can use simple_return to return outside
11443 of a shrink-wrapped region. At present shrink-wrapping is supported
11447 s390_can_use_simple_return_insn (void)
11452 /* Return true if the epilogue is guaranteed to contain only a return
11453 instruction and if a direct return can therefore be used instead.
11454 One of the main advantages of using direct return instructions
11455 is that we can then use conditional returns. */
11458 s390_can_use_return_insn (void)
11462 if (!reload_completed)
11468 if (TARGET_TPF_PROFILING)
11471 for (i = 0; i < 16; i++)
11472 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11475 /* For 31 bit this is not covered by the frame_size check below
11476 since f4, f6 are saved in the register save area without needing
11477 additional stack space. */
11479 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11482 if (cfun->machine->base_reg
11483 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11486 return cfun_frame_layout.frame_size == 0;
11489 /* The VX ABI differs for vararg functions. Therefore we need the
11490 prototype of the callee to be available when passing vector type
11492 static const char *
11493 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11495 return ((TARGET_VX_ABI
11497 && VECTOR_TYPE_P (TREE_TYPE (val))
11498 && (funcdecl == NULL_TREE
11499 || (TREE_CODE (funcdecl) == FUNCTION_DECL
11500 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11501 ? N_("Vector argument passed to unprototyped function")
11506 /* Return the size in bytes of a function argument of
11507 type TYPE and/or mode MODE. At least one of TYPE or
11508 MODE must be specified. */
11511 s390_function_arg_size (machine_mode mode, const_tree type)
11514 return int_size_in_bytes (type);
11516 /* No type info available for some library calls ... */
11517 if (mode != BLKmode)
11518 return GET_MODE_SIZE (mode);
11520 /* If we have neither type nor mode, abort */
11521 gcc_unreachable ();
11524 /* Return true if a function argument of type TYPE and mode MODE
11525 is to be passed in a vector register, if available. */
11528 s390_function_arg_vector (machine_mode mode, const_tree type)
11530 if (!TARGET_VX_ABI)
11533 if (s390_function_arg_size (mode, type) > 16)
11536 /* No type info available for some library calls ... */
11538 return VECTOR_MODE_P (mode);
11540 /* The ABI says that record types with a single member are treated
11541 just like that member would be. */
11542 while (TREE_CODE (type) == RECORD_TYPE)
11544 tree field, single = NULL_TREE;
11546 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11548 if (TREE_CODE (field) != FIELD_DECL)
11551 if (single == NULL_TREE)
11552 single = TREE_TYPE (field);
11557 if (single == NULL_TREE)
11561 /* If the field declaration adds extra byte due to
11562 e.g. padding this is not accepted as vector type. */
11563 if (int_size_in_bytes (single) <= 0
11564 || int_size_in_bytes (single) != int_size_in_bytes (type))
11570 return VECTOR_TYPE_P (type);
11573 /* Return true if a function argument of type TYPE and mode MODE
11574 is to be passed in a floating-point register, if available. */
11577 s390_function_arg_float (machine_mode mode, const_tree type)
11579 if (s390_function_arg_size (mode, type) > 8)
11582 /* Soft-float changes the ABI: no floating-point registers are used. */
11583 if (TARGET_SOFT_FLOAT)
11586 /* No type info available for some library calls ... */
11588 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11590 /* The ABI says that record types with a single member are treated
11591 just like that member would be. */
11592 while (TREE_CODE (type) == RECORD_TYPE)
11594 tree field, single = NULL_TREE;
11596 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11598 if (TREE_CODE (field) != FIELD_DECL)
11601 if (single == NULL_TREE)
11602 single = TREE_TYPE (field);
11607 if (single == NULL_TREE)
11613 return TREE_CODE (type) == REAL_TYPE;
11616 /* Return true if a function argument of type TYPE and mode MODE
11617 is to be passed in an integer register, or a pair of integer
11618 registers, if available. */
11621 s390_function_arg_integer (machine_mode mode, const_tree type)
11623 int size = s390_function_arg_size (mode, type);
11627 /* No type info available for some library calls ... */
11629 return GET_MODE_CLASS (mode) == MODE_INT
11630 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
11632 /* We accept small integral (and similar) types. */
11633 if (INTEGRAL_TYPE_P (type)
11634 || POINTER_TYPE_P (type)
11635 || TREE_CODE (type) == NULLPTR_TYPE
11636 || TREE_CODE (type) == OFFSET_TYPE
11637 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
11640 /* We also accept structs of size 1, 2, 4, 8 that are not
11641 passed in floating-point registers. */
11642 if (AGGREGATE_TYPE_P (type)
11643 && exact_log2 (size) >= 0
11644 && !s390_function_arg_float (mode, type))
11650 /* Return 1 if a function argument of type TYPE and mode MODE
11651 is to be passed by reference. The ABI specifies that only
11652 structures of size 1, 2, 4, or 8 bytes are passed by value,
11653 all other structures (and complex numbers) are passed by
11657 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
11658 machine_mode mode, const_tree type,
11659 bool named ATTRIBUTE_UNUSED)
11661 int size = s390_function_arg_size (mode, type);
11663 if (s390_function_arg_vector (mode, type))
11671 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
11674 if (TREE_CODE (type) == COMPLEX_TYPE
11675 || TREE_CODE (type) == VECTOR_TYPE)
11682 /* Update the data in CUM to advance over an argument of mode MODE and
11683 data type TYPE. (TYPE is null for libcalls where that information
11684 may not be available.). The boolean NAMED specifies whether the
11685 argument is a named argument (as opposed to an unnamed argument
11686 matching an ellipsis). */
11689 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
11690 const_tree type, bool named)
11692 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11694 if (s390_function_arg_vector (mode, type))
11696 /* We are called for unnamed vector stdarg arguments which are
11697 passed on the stack. In this case this hook does not have to
11698 do anything since stack arguments are tracked by common
11704 else if (s390_function_arg_float (mode, type))
11708 else if (s390_function_arg_integer (mode, type))
11710 int size = s390_function_arg_size (mode, type);
11711 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
11714 gcc_unreachable ();
11717 /* Define where to put the arguments to a function.
11718 Value is zero to push the argument on the stack,
11719 or a hard register in which to store the argument.
11721 MODE is the argument's machine mode.
11722 TYPE is the data type of the argument (as a tree).
11723 This is null for libcalls where that information may
11725 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11726 the preceding args and about the function being called.
11727 NAMED is nonzero if this argument is a named parameter
11728 (otherwise it is an extra parameter matching an ellipsis).
11730 On S/390, we use general purpose registers 2 through 6 to
11731 pass integer, pointer, and certain structure arguments, and
11732 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
11733 to pass floating point arguments. All remaining arguments
11734 are pushed to the stack. */
11737 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
11738 const_tree type, bool named)
11740 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11743 s390_check_type_for_vector_abi (type, true, false);
11745 if (s390_function_arg_vector (mode, type))
11747 /* Vector arguments being part of the ellipsis are passed on the
11749 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
11752 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
11754 else if (s390_function_arg_float (mode, type))
11756 if (cum->fprs + 1 > FP_ARG_NUM_REG)
11759 return gen_rtx_REG (mode, cum->fprs + 16);
11761 else if (s390_function_arg_integer (mode, type))
11763 int size = s390_function_arg_size (mode, type);
11764 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11766 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
11768 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
11769 return gen_rtx_REG (mode, cum->gprs + 2);
11770 else if (n_gprs == 2)
11772 rtvec p = rtvec_alloc (2);
11775 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
11778 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
11781 return gen_rtx_PARALLEL (mode, p);
11785 /* After the real arguments, expand_call calls us once again
11786 with a void_type_node type. Whatever we return here is
11787 passed as operand 2 to the call expanders.
11789 We don't need this feature ... */
11790 else if (type == void_type_node)
11793 gcc_unreachable ();
11796 /* Return true if return values of type TYPE should be returned
11797 in a memory buffer whose address is passed by the caller as
11798 hidden first argument. */
11801 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
11803 /* We accept small integral (and similar) types. */
11804 if (INTEGRAL_TYPE_P (type)
11805 || POINTER_TYPE_P (type)
11806 || TREE_CODE (type) == OFFSET_TYPE
11807 || TREE_CODE (type) == REAL_TYPE)
11808 return int_size_in_bytes (type) > 8;
11810 /* vector types which fit into a VR. */
11812 && VECTOR_TYPE_P (type)
11813 && int_size_in_bytes (type) <= 16)
11816 /* Aggregates and similar constructs are always returned
11818 if (AGGREGATE_TYPE_P (type)
11819 || TREE_CODE (type) == COMPLEX_TYPE
11820 || VECTOR_TYPE_P (type))
11823 /* ??? We get called on all sorts of random stuff from
11824 aggregate_value_p. We can't abort, but it's not clear
11825 what's safe to return. Pretend it's a struct I guess. */
11829 /* Function arguments and return values are promoted to word size. */
11831 static machine_mode
11832 s390_promote_function_mode (const_tree type, machine_mode mode,
11834 const_tree fntype ATTRIBUTE_UNUSED,
11835 int for_return ATTRIBUTE_UNUSED)
11837 if (INTEGRAL_MODE_P (mode)
11838 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
11840 if (type != NULL_TREE && POINTER_TYPE_P (type))
11841 *punsignedp = POINTERS_EXTEND_UNSIGNED;
11848 /* Define where to return a (scalar) value of type RET_TYPE.
11849 If RET_TYPE is null, define where to return a (scalar)
11850 value of mode MODE from a libcall. */
11853 s390_function_and_libcall_value (machine_mode mode,
11854 const_tree ret_type,
11855 const_tree fntype_or_decl,
11856 bool outgoing ATTRIBUTE_UNUSED)
11858 /* For vector return types it is important to use the RET_TYPE
11859 argument whenever available since the middle-end might have
11860 changed the mode to a scalar mode. */
11861 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
11862 || (!ret_type && VECTOR_MODE_P (mode)));
11864 /* For normal functions perform the promotion as
11865 promote_function_mode would do. */
11868 int unsignedp = TYPE_UNSIGNED (ret_type);
11869 mode = promote_function_mode (ret_type, mode, &unsignedp,
11870 fntype_or_decl, 1);
11873 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
11874 || SCALAR_FLOAT_MODE_P (mode)
11875 || (TARGET_VX_ABI && vector_ret_type_p));
11876 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
11878 if (TARGET_VX_ABI && vector_ret_type_p)
11879 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
11880 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
11881 return gen_rtx_REG (mode, 16);
11882 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
11883 || UNITS_PER_LONG == UNITS_PER_WORD)
11884 return gen_rtx_REG (mode, 2);
11885 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
11887 /* This case is triggered when returning a 64 bit value with
11888 -m31 -mzarch. Although the value would fit into a single
11889 register it has to be forced into a 32 bit register pair in
11890 order to match the ABI. */
11891 rtvec p = rtvec_alloc (2);
11894 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
11896 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
11898 return gen_rtx_PARALLEL (mode, p);
11901 gcc_unreachable ();
11904 /* Define where to return a scalar return value of type RET_TYPE. */
11907 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
11910 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
11911 fn_decl_or_type, outgoing);
11914 /* Define where to return a scalar libcall return value of mode
11918 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
11920 return s390_function_and_libcall_value (mode, NULL_TREE,
11925 /* Create and return the va_list datatype.
11927 On S/390, va_list is an array type equivalent to
11929 typedef struct __va_list_tag
11933 void *__overflow_arg_area;
11934 void *__reg_save_area;
11937 where __gpr and __fpr hold the number of general purpose
11938 or floating point arguments used up to now, respectively,
11939 __overflow_arg_area points to the stack location of the
11940 next argument passed on the stack, and __reg_save_area
11941 always points to the start of the register area in the
11942 call frame of the current function. The function prologue
11943 saves all registers used for argument passing into this
11944 area if the function uses variable arguments. */
11947 s390_build_builtin_va_list (void)
11949 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
11951 record = lang_hooks.types.make_type (RECORD_TYPE);
11954 build_decl (BUILTINS_LOCATION,
11955 TYPE_DECL, get_identifier ("__va_list_tag"), record);
11957 f_gpr = build_decl (BUILTINS_LOCATION,
11958 FIELD_DECL, get_identifier ("__gpr"),
11959 long_integer_type_node);
11960 f_fpr = build_decl (BUILTINS_LOCATION,
11961 FIELD_DECL, get_identifier ("__fpr"),
11962 long_integer_type_node);
11963 f_ovf = build_decl (BUILTINS_LOCATION,
11964 FIELD_DECL, get_identifier ("__overflow_arg_area"),
11966 f_sav = build_decl (BUILTINS_LOCATION,
11967 FIELD_DECL, get_identifier ("__reg_save_area"),
11970 va_list_gpr_counter_field = f_gpr;
11971 va_list_fpr_counter_field = f_fpr;
11973 DECL_FIELD_CONTEXT (f_gpr) = record;
11974 DECL_FIELD_CONTEXT (f_fpr) = record;
11975 DECL_FIELD_CONTEXT (f_ovf) = record;
11976 DECL_FIELD_CONTEXT (f_sav) = record;
11978 TYPE_STUB_DECL (record) = type_decl;
11979 TYPE_NAME (record) = type_decl;
11980 TYPE_FIELDS (record) = f_gpr;
11981 DECL_CHAIN (f_gpr) = f_fpr;
11982 DECL_CHAIN (f_fpr) = f_ovf;
11983 DECL_CHAIN (f_ovf) = f_sav;
11985 layout_type (record);
11987 /* The correct type is an array type of one element. */
11988 return build_array_type (record, build_index_type (size_zero_node));
11991 /* Implement va_start by filling the va_list structure VALIST.
11992 STDARG_P is always true, and ignored.
11993 NEXTARG points to the first anonymous stack argument.
11995 The following global variables are used to initialize
11996 the va_list structure:
11999 holds number of gprs and fprs used for named arguments.
12000 crtl->args.arg_offset_rtx:
12001 holds the offset of the first anonymous stack argument
12002 (relative to the virtual arg pointer). */
12005 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12007 HOST_WIDE_INT n_gpr, n_fpr;
12009 tree f_gpr, f_fpr, f_ovf, f_sav;
12010 tree gpr, fpr, ovf, sav, t;
12012 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12013 f_fpr = DECL_CHAIN (f_gpr);
12014 f_ovf = DECL_CHAIN (f_fpr);
12015 f_sav = DECL_CHAIN (f_ovf);
12017 valist = build_simple_mem_ref (valist);
12018 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12019 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12020 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12021 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12023 /* Count number of gp and fp argument registers used. */
12025 n_gpr = crtl->args.info.gprs;
12026 n_fpr = crtl->args.info.fprs;
12028 if (cfun->va_list_gpr_size)
12030 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12031 build_int_cst (NULL_TREE, n_gpr));
12032 TREE_SIDE_EFFECTS (t) = 1;
12033 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12036 if (cfun->va_list_fpr_size)
12038 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12039 build_int_cst (NULL_TREE, n_fpr));
12040 TREE_SIDE_EFFECTS (t) = 1;
12041 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12044 if (flag_split_stack
12045 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12047 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12052 reg = gen_reg_rtx (Pmode);
12053 cfun->machine->split_stack_varargs_pointer = reg;
12056 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12057 seq = get_insns ();
12060 push_topmost_sequence ();
12061 emit_insn_after (seq, entry_of_function ());
12062 pop_topmost_sequence ();
12065 /* Find the overflow area.
12066 FIXME: This currently is too pessimistic when the vector ABI is
12067 enabled. In that case we *always* set up the overflow area
12069 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12070 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12073 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12074 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12076 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12078 off = INTVAL (crtl->args.arg_offset_rtx);
12079 off = off < 0 ? 0 : off;
12080 if (TARGET_DEBUG_ARG)
12081 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12082 (int)n_gpr, (int)n_fpr, off);
12084 t = fold_build_pointer_plus_hwi (t, off);
12086 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12087 TREE_SIDE_EFFECTS (t) = 1;
12088 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12091 /* Find the register save area. */
12092 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12093 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12095 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12096 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12098 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12099 TREE_SIDE_EFFECTS (t) = 1;
12100 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12104 /* Implement va_arg by updating the va_list structure
12105 VALIST as required to retrieve an argument of type
12106 TYPE, and returning that argument.
12108 Generates code equivalent to:
12110 if (integral value) {
12111 if (size <= 4 && args.gpr < 5 ||
12112 size > 4 && args.gpr < 4 )
12113 ret = args.reg_save_area[args.gpr+8]
12115 ret = *args.overflow_arg_area++;
12116 } else if (vector value) {
12117 ret = *args.overflow_arg_area;
12118 args.overflow_arg_area += size / 8;
12119 } else if (float value) {
12121 ret = args.reg_save_area[args.fpr+64]
12123 ret = *args.overflow_arg_area++;
12124 } else if (aggregate value) {
12126 ret = *args.reg_save_area[args.gpr]
12128 ret = **args.overflow_arg_area++;
12132 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12133 gimple_seq *post_p ATTRIBUTE_UNUSED)
12135 tree f_gpr, f_fpr, f_ovf, f_sav;
12136 tree gpr, fpr, ovf, sav, reg, t, u;
12137 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12138 tree lab_false, lab_over;
12139 tree addr = create_tmp_var (ptr_type_node, "addr");
12140 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12143 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12144 f_fpr = DECL_CHAIN (f_gpr);
12145 f_ovf = DECL_CHAIN (f_fpr);
12146 f_sav = DECL_CHAIN (f_ovf);
12148 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12149 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12150 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12152 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12153 both appear on a lhs. */
12154 valist = unshare_expr (valist);
12155 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12157 size = int_size_in_bytes (type);
12159 s390_check_type_for_vector_abi (type, true, false);
12161 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12163 if (TARGET_DEBUG_ARG)
12165 fprintf (stderr, "va_arg: aggregate type");
12169 /* Aggregates are passed by reference. */
12174 /* kernel stack layout on 31 bit: It is assumed here that no padding
12175 will be added by s390_frame_info because for va_args always an even
12176 number of gprs has to be saved r15-r2 = 14 regs. */
12177 sav_ofs = 2 * UNITS_PER_LONG;
12178 sav_scale = UNITS_PER_LONG;
12179 size = UNITS_PER_LONG;
12180 max_reg = GP_ARG_NUM_REG - n_reg;
12181 left_align_p = false;
12183 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12185 if (TARGET_DEBUG_ARG)
12187 fprintf (stderr, "va_arg: vector type");
12197 left_align_p = true;
12199 else if (s390_function_arg_float (TYPE_MODE (type), type))
12201 if (TARGET_DEBUG_ARG)
12203 fprintf (stderr, "va_arg: float type");
12207 /* FP args go in FP registers, if present. */
12211 sav_ofs = 16 * UNITS_PER_LONG;
12213 max_reg = FP_ARG_NUM_REG - n_reg;
12214 left_align_p = false;
12218 if (TARGET_DEBUG_ARG)
12220 fprintf (stderr, "va_arg: other type");
12224 /* Otherwise into GP registers. */
12227 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12229 /* kernel stack layout on 31 bit: It is assumed here that no padding
12230 will be added by s390_frame_info because for va_args always an even
12231 number of gprs has to be saved r15-r2 = 14 regs. */
12232 sav_ofs = 2 * UNITS_PER_LONG;
12234 if (size < UNITS_PER_LONG)
12235 sav_ofs += UNITS_PER_LONG - size;
12237 sav_scale = UNITS_PER_LONG;
12238 max_reg = GP_ARG_NUM_REG - n_reg;
12239 left_align_p = false;
12242 /* Pull the value out of the saved registers ... */
12244 if (reg != NULL_TREE)
12247 if (reg > ((typeof (reg))max_reg))
12250 addr = sav + sav_ofs + reg * save_scale;
12257 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12258 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12260 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12261 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12262 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12263 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12264 gimplify_and_add (t, pre_p);
12266 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12267 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12268 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12269 t = fold_build_pointer_plus (t, u);
12271 gimplify_assign (addr, t, pre_p);
12273 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12275 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12278 /* ... Otherwise out of the overflow area. */
12281 if (size < UNITS_PER_LONG && !left_align_p)
12282 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12284 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12286 gimplify_assign (addr, t, pre_p);
12288 if (size < UNITS_PER_LONG && left_align_p)
12289 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12291 t = fold_build_pointer_plus_hwi (t, size);
12293 gimplify_assign (ovf, t, pre_p);
12295 if (reg != NULL_TREE)
12296 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12299 /* Increment register save count. */
12303 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12304 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12305 gimplify_and_add (u, pre_p);
12310 t = build_pointer_type_for_mode (build_pointer_type (type),
12312 addr = fold_convert (t, addr);
12313 addr = build_va_arg_indirect_ref (addr);
12317 t = build_pointer_type_for_mode (type, ptr_mode, true);
12318 addr = fold_convert (t, addr);
12321 return build_va_arg_indirect_ref (addr);
12324 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12326 DEST - Register location where CC will be stored.
12327 TDB - Pointer to a 256 byte area where to store the transaction.
12328 diagnostic block. NULL if TDB is not needed.
12329 RETRY - Retry count value. If non-NULL a retry loop for CC2
12331 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12332 of the tbegin instruction pattern. */
12335 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12337 rtx retry_plus_two = gen_reg_rtx (SImode);
12338 rtx retry_reg = gen_reg_rtx (SImode);
12339 rtx_code_label *retry_label = NULL;
12341 if (retry != NULL_RTX)
12343 emit_move_insn (retry_reg, retry);
12344 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12345 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12346 retry_label = gen_label_rtx ();
12347 emit_label (retry_label);
12350 if (clobber_fprs_p)
12353 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12356 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12360 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12363 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12364 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12366 UNSPEC_CC_TO_INT));
12367 if (retry != NULL_RTX)
12369 const int CC0 = 1 << 3;
12370 const int CC1 = 1 << 2;
12371 const int CC3 = 1 << 0;
12373 rtx count = gen_reg_rtx (SImode);
12374 rtx_code_label *leave_label = gen_label_rtx ();
12376 /* Exit for success and permanent failures. */
12377 jump = s390_emit_jump (leave_label,
12378 gen_rtx_EQ (VOIDmode,
12379 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12380 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12381 LABEL_NUSES (leave_label) = 1;
12383 /* CC2 - transient failure. Perform retry with ppa. */
12384 emit_move_insn (count, retry_plus_two);
12385 emit_insn (gen_subsi3 (count, count, retry_reg));
12386 emit_insn (gen_tx_assist (count));
12387 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12390 JUMP_LABEL (jump) = retry_label;
12391 LABEL_NUSES (retry_label) = 1;
12392 emit_label (leave_label);
12397 /* Return the decl for the target specific builtin with the function
12401 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12403 if (fcode >= S390_BUILTIN_MAX)
12404 return error_mark_node;
12406 return s390_builtin_decls[fcode];
12409 /* We call mcount before the function prologue. So a profiled leaf
12410 function should stay a leaf function. */
12413 s390_keep_leaf_when_profiled ()
12418 /* Output assembly code for the trampoline template to
12421 On S/390, we use gpr 1 internally in the trampoline code;
12422 gpr 0 is used to hold the static chain. */
12425 s390_asm_trampoline_template (FILE *file)
12428 op[0] = gen_rtx_REG (Pmode, 0);
12429 op[1] = gen_rtx_REG (Pmode, 1);
12433 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12434 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
12435 output_asm_insn ("br\t%1", op); /* 2 byte */
12436 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12440 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12441 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
12442 output_asm_insn ("br\t%1", op); /* 2 byte */
12443 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12447 /* Emit RTL insns to initialize the variable parts of a trampoline.
12448 FNADDR is an RTX for the address of the function's pure code.
12449 CXT is an RTX for the static chain value for the function. */
12452 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12454 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12457 emit_block_move (m_tramp, assemble_trampoline_template (),
12458 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12460 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12461 emit_move_insn (mem, cxt);
12462 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12463 emit_move_insn (mem, fnaddr);
12466 /* Output assembler code to FILE to increment profiler label # LABELNO
12467 for profiling a function entry. */
12470 s390_function_profiler (FILE *file, int labelno)
12475 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12477 fprintf (file, "# function profiler \n");
12479 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12480 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12481 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12483 op[2] = gen_rtx_REG (Pmode, 1);
12484 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12485 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12487 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
12490 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12491 op[4] = gen_rtx_CONST (Pmode, op[4]);
12496 output_asm_insn ("stg\t%0,%1", op);
12497 output_asm_insn ("larl\t%2,%3", op);
12498 output_asm_insn ("brasl\t%0,%4", op);
12499 output_asm_insn ("lg\t%0,%1", op);
12501 else if (TARGET_CPU_ZARCH)
12503 output_asm_insn ("st\t%0,%1", op);
12504 output_asm_insn ("larl\t%2,%3", op);
12505 output_asm_insn ("brasl\t%0,%4", op);
12506 output_asm_insn ("l\t%0,%1", op);
12508 else if (!flag_pic)
12510 op[6] = gen_label_rtx ();
12512 output_asm_insn ("st\t%0,%1", op);
12513 output_asm_insn ("bras\t%2,%l6", op);
12514 output_asm_insn (".long\t%4", op);
12515 output_asm_insn (".long\t%3", op);
12516 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12517 output_asm_insn ("l\t%0,0(%2)", op);
12518 output_asm_insn ("l\t%2,4(%2)", op);
12519 output_asm_insn ("basr\t%0,%0", op);
12520 output_asm_insn ("l\t%0,%1", op);
12524 op[5] = gen_label_rtx ();
12525 op[6] = gen_label_rtx ();
12527 output_asm_insn ("st\t%0,%1", op);
12528 output_asm_insn ("bras\t%2,%l6", op);
12529 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
12530 output_asm_insn (".long\t%4-%l5", op);
12531 output_asm_insn (".long\t%3-%l5", op);
12532 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12533 output_asm_insn ("lr\t%0,%2", op);
12534 output_asm_insn ("a\t%0,0(%2)", op);
12535 output_asm_insn ("a\t%2,4(%2)", op);
12536 output_asm_insn ("basr\t%0,%0", op);
12537 output_asm_insn ("l\t%0,%1", op);
12541 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
12542 into its SYMBOL_REF_FLAGS. */
12545 s390_encode_section_info (tree decl, rtx rtl, int first)
12547 default_encode_section_info (decl, rtl, first);
12549 if (TREE_CODE (decl) == VAR_DECL)
12551 /* Store the alignment to be able to check if we can use
12552 a larl/load-relative instruction. We only handle the cases
12553 that can go wrong (i.e. no FUNC_DECLs). */
12554 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
12555 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12556 else if (DECL_ALIGN (decl) % 32)
12557 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12558 else if (DECL_ALIGN (decl) % 64)
12559 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12562 /* Literal pool references don't have a decl so they are handled
12563 differently here. We rely on the information in the MEM_ALIGN
12564 entry to decide upon the alignment. */
12566 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
12567 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
12569 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
12570 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12571 else if (MEM_ALIGN (rtl) % 32)
12572 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12573 else if (MEM_ALIGN (rtl) % 64)
12574 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12578 /* Output thunk to FILE that implements a C++ virtual function call (with
12579 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
12580 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
12581 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
12582 relative to the resulting this pointer. */
12585 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
12586 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12592 /* Make sure unwind info is emitted for the thunk if needed. */
12593 final_start_function (emit_barrier (), file, 1);
12595 /* Operand 0 is the target function. */
12596 op[0] = XEXP (DECL_RTL (function), 0);
12597 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
12600 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
12601 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
12602 op[0] = gen_rtx_CONST (Pmode, op[0]);
12605 /* Operand 1 is the 'this' pointer. */
12606 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12607 op[1] = gen_rtx_REG (Pmode, 3);
12609 op[1] = gen_rtx_REG (Pmode, 2);
12611 /* Operand 2 is the delta. */
12612 op[2] = GEN_INT (delta);
12614 /* Operand 3 is the vcall_offset. */
12615 op[3] = GEN_INT (vcall_offset);
12617 /* Operand 4 is the temporary register. */
12618 op[4] = gen_rtx_REG (Pmode, 1);
12620 /* Operands 5 to 8 can be used as labels. */
12626 /* Operand 9 can be used for temporary register. */
12629 /* Generate code. */
12632 /* Setup literal pool pointer if required. */
12633 if ((!DISP_IN_RANGE (delta)
12634 && !CONST_OK_FOR_K (delta)
12635 && !CONST_OK_FOR_Os (delta))
12636 || (!DISP_IN_RANGE (vcall_offset)
12637 && !CONST_OK_FOR_K (vcall_offset)
12638 && !CONST_OK_FOR_Os (vcall_offset)))
12640 op[5] = gen_label_rtx ();
12641 output_asm_insn ("larl\t%4,%5", op);
12644 /* Add DELTA to this pointer. */
12647 if (CONST_OK_FOR_J (delta))
12648 output_asm_insn ("la\t%1,%2(%1)", op);
12649 else if (DISP_IN_RANGE (delta))
12650 output_asm_insn ("lay\t%1,%2(%1)", op);
12651 else if (CONST_OK_FOR_K (delta))
12652 output_asm_insn ("aghi\t%1,%2", op);
12653 else if (CONST_OK_FOR_Os (delta))
12654 output_asm_insn ("agfi\t%1,%2", op);
12657 op[6] = gen_label_rtx ();
12658 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
12662 /* Perform vcall adjustment. */
12665 if (DISP_IN_RANGE (vcall_offset))
12667 output_asm_insn ("lg\t%4,0(%1)", op);
12668 output_asm_insn ("ag\t%1,%3(%4)", op);
12670 else if (CONST_OK_FOR_K (vcall_offset))
12672 output_asm_insn ("lghi\t%4,%3", op);
12673 output_asm_insn ("ag\t%4,0(%1)", op);
12674 output_asm_insn ("ag\t%1,0(%4)", op);
12676 else if (CONST_OK_FOR_Os (vcall_offset))
12678 output_asm_insn ("lgfi\t%4,%3", op);
12679 output_asm_insn ("ag\t%4,0(%1)", op);
12680 output_asm_insn ("ag\t%1,0(%4)", op);
12684 op[7] = gen_label_rtx ();
12685 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
12686 output_asm_insn ("ag\t%4,0(%1)", op);
12687 output_asm_insn ("ag\t%1,0(%4)", op);
12691 /* Jump to target. */
12692 output_asm_insn ("jg\t%0", op);
12694 /* Output literal pool if required. */
12697 output_asm_insn (".align\t4", op);
12698 targetm.asm_out.internal_label (file, "L",
12699 CODE_LABEL_NUMBER (op[5]));
12703 targetm.asm_out.internal_label (file, "L",
12704 CODE_LABEL_NUMBER (op[6]));
12705 output_asm_insn (".long\t%2", op);
12709 targetm.asm_out.internal_label (file, "L",
12710 CODE_LABEL_NUMBER (op[7]));
12711 output_asm_insn (".long\t%3", op);
12716 /* Setup base pointer if required. */
12718 || (!DISP_IN_RANGE (delta)
12719 && !CONST_OK_FOR_K (delta)
12720 && !CONST_OK_FOR_Os (delta))
12721 || (!DISP_IN_RANGE (delta)
12722 && !CONST_OK_FOR_K (vcall_offset)
12723 && !CONST_OK_FOR_Os (vcall_offset)))
12725 op[5] = gen_label_rtx ();
12726 output_asm_insn ("basr\t%4,0", op);
12727 targetm.asm_out.internal_label (file, "L",
12728 CODE_LABEL_NUMBER (op[5]));
12731 /* Add DELTA to this pointer. */
12734 if (CONST_OK_FOR_J (delta))
12735 output_asm_insn ("la\t%1,%2(%1)", op);
12736 else if (DISP_IN_RANGE (delta))
12737 output_asm_insn ("lay\t%1,%2(%1)", op);
12738 else if (CONST_OK_FOR_K (delta))
12739 output_asm_insn ("ahi\t%1,%2", op);
12740 else if (CONST_OK_FOR_Os (delta))
12741 output_asm_insn ("afi\t%1,%2", op);
12744 op[6] = gen_label_rtx ();
12745 output_asm_insn ("a\t%1,%6-%5(%4)", op);
12749 /* Perform vcall adjustment. */
12752 if (CONST_OK_FOR_J (vcall_offset))
12754 output_asm_insn ("l\t%4,0(%1)", op);
12755 output_asm_insn ("a\t%1,%3(%4)", op);
12757 else if (DISP_IN_RANGE (vcall_offset))
12759 output_asm_insn ("l\t%4,0(%1)", op);
12760 output_asm_insn ("ay\t%1,%3(%4)", op);
12762 else if (CONST_OK_FOR_K (vcall_offset))
12764 output_asm_insn ("lhi\t%4,%3", op);
12765 output_asm_insn ("a\t%4,0(%1)", op);
12766 output_asm_insn ("a\t%1,0(%4)", op);
12768 else if (CONST_OK_FOR_Os (vcall_offset))
12770 output_asm_insn ("iilf\t%4,%3", op);
12771 output_asm_insn ("a\t%4,0(%1)", op);
12772 output_asm_insn ("a\t%1,0(%4)", op);
12776 op[7] = gen_label_rtx ();
12777 output_asm_insn ("l\t%4,%7-%5(%4)", op);
12778 output_asm_insn ("a\t%4,0(%1)", op);
12779 output_asm_insn ("a\t%1,0(%4)", op);
12782 /* We had to clobber the base pointer register.
12783 Re-setup the base pointer (with a different base). */
12784 op[5] = gen_label_rtx ();
12785 output_asm_insn ("basr\t%4,0", op);
12786 targetm.asm_out.internal_label (file, "L",
12787 CODE_LABEL_NUMBER (op[5]));
12790 /* Jump to target. */
12791 op[8] = gen_label_rtx ();
12794 output_asm_insn ("l\t%4,%8-%5(%4)", op);
12795 else if (!nonlocal)
12796 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12797 /* We cannot call through .plt, since .plt requires %r12 loaded. */
12798 else if (flag_pic == 1)
12800 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12801 output_asm_insn ("l\t%4,%0(%4)", op);
12803 else if (flag_pic == 2)
12805 op[9] = gen_rtx_REG (Pmode, 0);
12806 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
12807 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12808 output_asm_insn ("ar\t%4,%9", op);
12809 output_asm_insn ("l\t%4,0(%4)", op);
12812 output_asm_insn ("br\t%4", op);
12814 /* Output literal pool. */
12815 output_asm_insn (".align\t4", op);
12817 if (nonlocal && flag_pic == 2)
12818 output_asm_insn (".long\t%0", op);
12821 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
12822 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
12825 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
12827 output_asm_insn (".long\t%0", op);
12829 output_asm_insn (".long\t%0-%5", op);
12833 targetm.asm_out.internal_label (file, "L",
12834 CODE_LABEL_NUMBER (op[6]));
12835 output_asm_insn (".long\t%2", op);
12839 targetm.asm_out.internal_label (file, "L",
12840 CODE_LABEL_NUMBER (op[7]));
12841 output_asm_insn (".long\t%3", op);
12844 final_end_function ();
12848 s390_valid_pointer_mode (machine_mode mode)
12850 return (mode == SImode || (TARGET_64BIT && mode == DImode));
12853 /* Checks whether the given CALL_EXPR would use a caller
12854 saved register. This is used to decide whether sibling call
12855 optimization could be performed on the respective function
12859 s390_call_saved_register_used (tree call_expr)
12861 CUMULATIVE_ARGS cum_v;
12862 cumulative_args_t cum;
12869 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
12870 cum = pack_cumulative_args (&cum_v);
12872 for (i = 0; i < call_expr_nargs (call_expr); i++)
12874 parameter = CALL_EXPR_ARG (call_expr, i);
12875 gcc_assert (parameter);
12877 /* For an undeclared variable passed as parameter we will get
12878 an ERROR_MARK node here. */
12879 if (TREE_CODE (parameter) == ERROR_MARK)
12882 type = TREE_TYPE (parameter);
12885 mode = TYPE_MODE (type);
12888 /* We assume that in the target function all parameters are
12889 named. This only has an impact on vector argument register
12890 usage none of which is call-saved. */
12891 if (pass_by_reference (&cum_v, mode, type, true))
12894 type = build_pointer_type (type);
12897 parm_rtx = s390_function_arg (cum, mode, type, true);
12899 s390_function_arg_advance (cum, mode, type, true);
12904 if (REG_P (parm_rtx))
12907 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
12909 if (!call_used_regs[reg + REGNO (parm_rtx)])
12913 if (GET_CODE (parm_rtx) == PARALLEL)
12917 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
12919 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
12921 gcc_assert (REG_P (r));
12924 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
12926 if (!call_used_regs[reg + REGNO (r)])
12935 /* Return true if the given call expression can be
12936 turned into a sibling call.
12937 DECL holds the declaration of the function to be called whereas
12938 EXP is the call expression itself. */
12941 s390_function_ok_for_sibcall (tree decl, tree exp)
12943 /* The TPF epilogue uses register 1. */
12944 if (TARGET_TPF_PROFILING)
12947 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
12948 which would have to be restored before the sibcall. */
12949 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
12952 /* Register 6 on s390 is available as an argument register but unfortunately
12953 "caller saved". This makes functions needing this register for arguments
12954 not suitable for sibcalls. */
12955 return !s390_call_saved_register_used (exp);
12958 /* Return the fixed registers used for condition codes. */
12961 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
12964 *p2 = INVALID_REGNUM;
12969 /* This function is used by the call expanders of the machine description.
12970 It emits the call insn itself together with the necessary operations
12971 to adjust the target address and returns the emitted insn.
12972 ADDR_LOCATION is the target address rtx
12973 TLS_CALL the location of the thread-local symbol
12974 RESULT_REG the register where the result of the call should be stored
12975 RETADDR_REG the register where the return address should be stored
12976 If this parameter is NULL_RTX the call is considered
12977 to be a sibling call. */
12980 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
12983 bool plt_call = false;
12989 /* Direct function calls need special treatment. */
12990 if (GET_CODE (addr_location) == SYMBOL_REF)
12992 /* When calling a global routine in PIC mode, we must
12993 replace the symbol itself with the PLT stub. */
12994 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
12996 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
12998 addr_location = gen_rtx_UNSPEC (Pmode,
12999 gen_rtvec (1, addr_location),
13001 addr_location = gen_rtx_CONST (Pmode, addr_location);
13005 /* For -fpic code the PLT entries might use r12 which is
13006 call-saved. Therefore we cannot do a sibcall when
13007 calling directly using a symbol ref. When reaching
13008 this point we decided (in s390_function_ok_for_sibcall)
13009 to do a sibcall for a function pointer but one of the
13010 optimizers was able to get rid of the function pointer
13011 by propagating the symbol ref into the call. This
13012 optimization is illegal for S/390 so we turn the direct
13013 call into a indirect call again. */
13014 addr_location = force_reg (Pmode, addr_location);
13017 /* Unless we can use the bras(l) insn, force the
13018 routine address into a register. */
13019 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
13022 addr_location = legitimize_pic_address (addr_location, 0);
13024 addr_location = force_reg (Pmode, addr_location);
13028 /* If it is already an indirect call or the code above moved the
13029 SYMBOL_REF to somewhere else make sure the address can be found in
13031 if (retaddr_reg == NULL_RTX
13032 && GET_CODE (addr_location) != SYMBOL_REF
13035 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13036 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13039 addr_location = gen_rtx_MEM (QImode, addr_location);
13040 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13042 if (result_reg != NULL_RTX)
13043 call = gen_rtx_SET (result_reg, call);
13045 if (retaddr_reg != NULL_RTX)
13047 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13049 if (tls_call != NULL_RTX)
13050 vec = gen_rtvec (3, call, clobber,
13051 gen_rtx_USE (VOIDmode, tls_call));
13053 vec = gen_rtvec (2, call, clobber);
13055 call = gen_rtx_PARALLEL (VOIDmode, vec);
13058 insn = emit_call_insn (call);
13060 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13061 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13063 /* s390_function_ok_for_sibcall should
13064 have denied sibcalls in this case. */
13065 gcc_assert (retaddr_reg != NULL_RTX);
13066 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13071 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13074 s390_conditional_register_usage (void)
13080 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13081 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13083 if (TARGET_CPU_ZARCH)
13085 fixed_regs[BASE_REGNUM] = 0;
13086 call_used_regs[BASE_REGNUM] = 0;
13087 fixed_regs[RETURN_REGNUM] = 0;
13088 call_used_regs[RETURN_REGNUM] = 0;
13092 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13093 call_used_regs[i] = call_really_used_regs[i] = 0;
13097 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13098 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13101 if (TARGET_SOFT_FLOAT)
13103 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13104 call_used_regs[i] = fixed_regs[i] = 1;
13107 /* Disable v16 - v31 for non-vector target. */
13110 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13111 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13115 /* Corresponding function to eh_return expander. */
13117 static GTY(()) rtx s390_tpf_eh_return_symbol;
13119 s390_emit_tpf_eh_return (rtx target)
13124 if (!s390_tpf_eh_return_symbol)
13125 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13127 reg = gen_rtx_REG (Pmode, 2);
13128 orig_ra = gen_rtx_REG (Pmode, 3);
13130 emit_move_insn (reg, target);
13131 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13132 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13133 gen_rtx_REG (Pmode, RETURN_REGNUM));
13134 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13135 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13137 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13140 /* Rework the prologue/epilogue to avoid saving/restoring
13141 registers unnecessarily. */
13144 s390_optimize_prologue (void)
13146 rtx_insn *insn, *new_insn, *next_insn;
13148 /* Do a final recompute of the frame-related data. */
13149 s390_optimize_register_info ();
13151 /* If all special registers are in fact used, there's nothing we
13152 can do, so no point in walking the insn list. */
13154 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13155 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
13156 && (TARGET_CPU_ZARCH
13157 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
13158 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
13161 /* Search for prologue/epilogue insns and replace them. */
13163 for (insn = get_insns (); insn; insn = next_insn)
13165 int first, last, off;
13166 rtx set, base, offset;
13169 next_insn = NEXT_INSN (insn);
13171 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13174 pat = PATTERN (insn);
13176 /* Remove ldgr/lgdr instructions used for saving and restore
13177 GPRs if possible. */
13182 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13183 tmp_pat = XVECEXP (pat, 0, 0);
13185 if (GET_CODE (tmp_pat) == SET
13186 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13187 && REG_P (SET_SRC (tmp_pat))
13188 && REG_P (SET_DEST (tmp_pat)))
13190 int src_regno = REGNO (SET_SRC (tmp_pat));
13191 int dest_regno = REGNO (SET_DEST (tmp_pat));
13195 if (!((GENERAL_REGNO_P (src_regno)
13196 && FP_REGNO_P (dest_regno))
13197 || (FP_REGNO_P (src_regno)
13198 && GENERAL_REGNO_P (dest_regno))))
13201 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13202 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13204 /* GPR must be call-saved, FPR must be call-clobbered. */
13205 if (!call_really_used_regs[fpr_regno]
13206 || call_really_used_regs[gpr_regno])
13209 /* It must not happen that what we once saved in an FPR now
13210 needs a stack slot. */
13211 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13213 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13215 remove_insn (insn);
13221 if (GET_CODE (pat) == PARALLEL
13222 && store_multiple_operation (pat, VOIDmode))
13224 set = XVECEXP (pat, 0, 0);
13225 first = REGNO (SET_SRC (set));
13226 last = first + XVECLEN (pat, 0) - 1;
13227 offset = const0_rtx;
13228 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13229 off = INTVAL (offset);
13231 if (GET_CODE (base) != REG || off < 0)
13233 if (cfun_frame_layout.first_save_gpr != -1
13234 && (cfun_frame_layout.first_save_gpr < first
13235 || cfun_frame_layout.last_save_gpr > last))
13237 if (REGNO (base) != STACK_POINTER_REGNUM
13238 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13240 if (first > BASE_REGNUM || last < BASE_REGNUM)
13243 if (cfun_frame_layout.first_save_gpr != -1)
13245 rtx s_pat = save_gprs (base,
13246 off + (cfun_frame_layout.first_save_gpr
13247 - first) * UNITS_PER_LONG,
13248 cfun_frame_layout.first_save_gpr,
13249 cfun_frame_layout.last_save_gpr);
13250 new_insn = emit_insn_before (s_pat, insn);
13251 INSN_ADDRESSES_NEW (new_insn, -1);
13254 remove_insn (insn);
13258 if (cfun_frame_layout.first_save_gpr == -1
13259 && GET_CODE (pat) == SET
13260 && GENERAL_REG_P (SET_SRC (pat))
13261 && GET_CODE (SET_DEST (pat)) == MEM)
13264 first = REGNO (SET_SRC (set));
13265 offset = const0_rtx;
13266 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13267 off = INTVAL (offset);
13269 if (GET_CODE (base) != REG || off < 0)
13271 if (REGNO (base) != STACK_POINTER_REGNUM
13272 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13275 remove_insn (insn);
13279 if (GET_CODE (pat) == PARALLEL
13280 && load_multiple_operation (pat, VOIDmode))
13282 set = XVECEXP (pat, 0, 0);
13283 first = REGNO (SET_DEST (set));
13284 last = first + XVECLEN (pat, 0) - 1;
13285 offset = const0_rtx;
13286 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13287 off = INTVAL (offset);
13289 if (GET_CODE (base) != REG || off < 0)
13292 if (cfun_frame_layout.first_restore_gpr != -1
13293 && (cfun_frame_layout.first_restore_gpr < first
13294 || cfun_frame_layout.last_restore_gpr > last))
13296 if (REGNO (base) != STACK_POINTER_REGNUM
13297 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13299 if (first > BASE_REGNUM || last < BASE_REGNUM)
13302 if (cfun_frame_layout.first_restore_gpr != -1)
13304 rtx rpat = restore_gprs (base,
13305 off + (cfun_frame_layout.first_restore_gpr
13306 - first) * UNITS_PER_LONG,
13307 cfun_frame_layout.first_restore_gpr,
13308 cfun_frame_layout.last_restore_gpr);
13310 /* Remove REG_CFA_RESTOREs for registers that we no
13311 longer need to save. */
13312 REG_NOTES (rpat) = REG_NOTES (insn);
13313 for (rtx *ptr = ®_NOTES (rpat); *ptr; )
13314 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13315 && ((int) REGNO (XEXP (*ptr, 0))
13316 < cfun_frame_layout.first_restore_gpr))
13317 *ptr = XEXP (*ptr, 1);
13319 ptr = &XEXP (*ptr, 1);
13320 new_insn = emit_insn_before (rpat, insn);
13321 RTX_FRAME_RELATED_P (new_insn) = 1;
13322 INSN_ADDRESSES_NEW (new_insn, -1);
13325 remove_insn (insn);
13329 if (cfun_frame_layout.first_restore_gpr == -1
13330 && GET_CODE (pat) == SET
13331 && GENERAL_REG_P (SET_DEST (pat))
13332 && GET_CODE (SET_SRC (pat)) == MEM)
13335 first = REGNO (SET_DEST (set));
13336 offset = const0_rtx;
13337 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13338 off = INTVAL (offset);
13340 if (GET_CODE (base) != REG || off < 0)
13343 if (REGNO (base) != STACK_POINTER_REGNUM
13344 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13347 remove_insn (insn);
13353 /* On z10 and later the dynamic branch prediction must see the
13354 backward jump within a certain windows. If not it falls back to
13355 the static prediction. This function rearranges the loop backward
13356 branch in a way which makes the static prediction always correct.
13357 The function returns true if it added an instruction. */
13359 s390_fix_long_loop_prediction (rtx_insn *insn)
13361 rtx set = single_set (insn);
13362 rtx code_label, label_ref;
13363 rtx_insn *uncond_jump;
13364 rtx_insn *cur_insn;
13368 /* This will exclude branch on count and branch on index patterns
13369 since these are correctly statically predicted. */
13371 || SET_DEST (set) != pc_rtx
13372 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13375 /* Skip conditional returns. */
13376 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13377 && XEXP (SET_SRC (set), 2) == pc_rtx)
13380 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13381 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13383 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13385 code_label = XEXP (label_ref, 0);
13387 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13388 || INSN_ADDRESSES (INSN_UID (insn)) == -1
13389 || (INSN_ADDRESSES (INSN_UID (insn))
13390 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13393 for (distance = 0, cur_insn = PREV_INSN (insn);
13394 distance < PREDICT_DISTANCE - 6;
13395 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13396 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13399 rtx_code_label *new_label = gen_label_rtx ();
13400 uncond_jump = emit_jump_insn_after (
13401 gen_rtx_SET (pc_rtx,
13402 gen_rtx_LABEL_REF (VOIDmode, code_label)),
13404 emit_label_after (new_label, uncond_jump);
13406 tmp = XEXP (SET_SRC (set), 1);
13407 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13408 XEXP (SET_SRC (set), 2) = tmp;
13409 INSN_CODE (insn) = -1;
13411 XEXP (label_ref, 0) = new_label;
13412 JUMP_LABEL (insn) = new_label;
13413 JUMP_LABEL (uncond_jump) = code_label;
13418 /* Returns 1 if INSN reads the value of REG for purposes not related
13419 to addressing of memory, and 0 otherwise. */
13421 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
13423 return reg_referenced_p (reg, PATTERN (insn))
13424 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
13427 /* Starting from INSN find_cond_jump looks downwards in the insn
13428 stream for a single jump insn which is the last user of the
13429 condition code set in INSN. */
13431 find_cond_jump (rtx_insn *insn)
13433 for (; insn; insn = NEXT_INSN (insn))
13437 if (LABEL_P (insn))
13440 if (!JUMP_P (insn))
13442 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
13447 /* This will be triggered by a return. */
13448 if (GET_CODE (PATTERN (insn)) != SET)
13451 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
13452 ite = SET_SRC (PATTERN (insn));
13454 if (GET_CODE (ite) != IF_THEN_ELSE)
13457 cc = XEXP (XEXP (ite, 0), 0);
13458 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
13461 if (find_reg_note (insn, REG_DEAD, cc))
13469 /* Swap the condition in COND and the operands in OP0 and OP1 so that
13470 the semantics does not change. If NULL_RTX is passed as COND the
13471 function tries to find the conditional jump starting with INSN. */
13473 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
13477 if (cond == NULL_RTX)
13479 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
13480 rtx set = jump ? single_set (jump) : NULL_RTX;
13482 if (set == NULL_RTX)
13485 cond = XEXP (SET_SRC (set), 0);
13490 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
13493 /* On z10, instructions of the compare-and-branch family have the
13494 property to access the register occurring as second operand with
13495 its bits complemented. If such a compare is grouped with a second
13496 instruction that accesses the same register non-complemented, and
13497 if that register's value is delivered via a bypass, then the
13498 pipeline recycles, thereby causing significant performance decline.
13499 This function locates such situations and exchanges the two
13500 operands of the compare. The function return true whenever it
13503 s390_z10_optimize_cmp (rtx_insn *insn)
13505 rtx_insn *prev_insn, *next_insn;
13506 bool insn_added_p = false;
13507 rtx cond, *op0, *op1;
13509 if (GET_CODE (PATTERN (insn)) == PARALLEL)
13511 /* Handle compare and branch and branch on count
13513 rtx pattern = single_set (insn);
13516 || SET_DEST (pattern) != pc_rtx
13517 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
13520 cond = XEXP (SET_SRC (pattern), 0);
13521 op0 = &XEXP (cond, 0);
13522 op1 = &XEXP (cond, 1);
13524 else if (GET_CODE (PATTERN (insn)) == SET)
13528 /* Handle normal compare instructions. */
13529 src = SET_SRC (PATTERN (insn));
13530 dest = SET_DEST (PATTERN (insn));
13533 || !CC_REGNO_P (REGNO (dest))
13534 || GET_CODE (src) != COMPARE)
13537 /* s390_swap_cmp will try to find the conditional
13538 jump when passing NULL_RTX as condition. */
13540 op0 = &XEXP (src, 0);
13541 op1 = &XEXP (src, 1);
13546 if (!REG_P (*op0) || !REG_P (*op1))
13549 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
13552 /* Swap the COMPARE arguments and its mask if there is a
13553 conflicting access in the previous insn. */
13554 prev_insn = prev_active_insn (insn);
13555 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13556 && reg_referenced_p (*op1, PATTERN (prev_insn)))
13557 s390_swap_cmp (cond, op0, op1, insn);
13559 /* Check if there is a conflict with the next insn. If there
13560 was no conflict with the previous insn, then swap the
13561 COMPARE arguments and its mask. If we already swapped
13562 the operands, or if swapping them would cause a conflict
13563 with the previous insn, issue a NOP after the COMPARE in
13564 order to separate the two instuctions. */
13565 next_insn = next_active_insn (insn);
13566 if (next_insn != NULL_RTX && INSN_P (next_insn)
13567 && s390_non_addr_reg_read_p (*op1, next_insn))
13569 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13570 && s390_non_addr_reg_read_p (*op0, prev_insn))
13572 if (REGNO (*op1) == 0)
13573 emit_insn_after (gen_nop1 (), insn);
13575 emit_insn_after (gen_nop (), insn);
13576 insn_added_p = true;
13579 s390_swap_cmp (cond, op0, op1, insn);
13581 return insn_added_p;
13584 /* Number of INSNs to be scanned backward in the last BB of the loop
13585 and forward in the first BB of the loop. This usually should be a
13586 bit more than the number of INSNs which could go into one
13588 #define S390_OSC_SCAN_INSN_NUM 5
13590 /* Scan LOOP for static OSC collisions and return true if a osc_break
13591 should be issued for this loop. */
13593 s390_adjust_loop_scan_osc (struct loop* loop)
13596 HARD_REG_SET modregs, newregs;
13597 rtx_insn *insn, *store_insn = NULL;
13599 struct s390_address addr_store, addr_load;
13600 subrtx_iterator::array_type array;
13603 CLEAR_HARD_REG_SET (modregs);
13606 FOR_BB_INSNS_REVERSE (loop->latch, insn)
13608 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13612 if (insn_count > S390_OSC_SCAN_INSN_NUM)
13615 find_all_hard_reg_sets (insn, &newregs, true);
13616 IOR_HARD_REG_SET (modregs, newregs);
13618 set = single_set (insn);
13622 if (MEM_P (SET_DEST (set))
13623 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
13630 if (store_insn == NULL_RTX)
13634 FOR_BB_INSNS (loop->header, insn)
13636 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13639 if (insn == store_insn)
13643 if (insn_count > S390_OSC_SCAN_INSN_NUM)
13646 find_all_hard_reg_sets (insn, &newregs, true);
13647 IOR_HARD_REG_SET (modregs, newregs);
13649 set = single_set (insn);
13653 /* An intermediate store disrupts static OSC checking
13655 if (MEM_P (SET_DEST (set))
13656 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
13659 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
13661 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
13662 && rtx_equal_p (addr_load.base, addr_store.base)
13663 && rtx_equal_p (addr_load.indx, addr_store.indx)
13664 && rtx_equal_p (addr_load.disp, addr_store.disp))
13666 if ((addr_load.base != NULL_RTX
13667 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
13668 || (addr_load.indx != NULL_RTX
13669 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
13676 /* Look for adjustments which can be done on simple innermost
13679 s390_adjust_loops ()
13681 struct loop *loop = NULL;
13684 compute_bb_for_insn ();
13686 /* Find the loops. */
13687 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
13689 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
13693 flow_loop_dump (loop, dump_file, NULL, 0);
13694 fprintf (dump_file, ";; OSC loop scan Loop: ");
13696 if (loop->latch == NULL
13697 || pc_set (BB_END (loop->latch)) == NULL_RTX
13698 || !s390_adjust_loop_scan_osc (loop))
13702 if (loop->latch == NULL)
13703 fprintf (dump_file, " muliple backward jumps\n");
13706 fprintf (dump_file, " header insn: %d latch insn: %d ",
13707 INSN_UID (BB_HEAD (loop->header)),
13708 INSN_UID (BB_END (loop->latch)));
13709 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
13710 fprintf (dump_file, " loop does not end with jump\n");
13712 fprintf (dump_file, " not instrumented\n");
13718 rtx_insn *new_insn;
13721 fprintf (dump_file, " adding OSC break insn: ");
13722 new_insn = emit_insn_before (gen_osc_break (),
13723 BB_END (loop->latch));
13724 INSN_ADDRESSES_NEW (new_insn, -1);
13728 loop_optimizer_finalize ();
13730 df_finish_pass (false);
13733 /* Perform machine-dependent processing. */
13738 bool pool_overflow = false;
13739 int hw_before, hw_after;
13741 if (s390_tune == PROCESSOR_2964_Z13)
13742 s390_adjust_loops ();
13744 /* Make sure all splits have been performed; splits after
13745 machine_dependent_reorg might confuse insn length counts. */
13746 split_all_insns_noflow ();
13748 /* Install the main literal pool and the associated base
13749 register load insns.
13751 In addition, there are two problematic situations we need
13754 - the literal pool might be > 4096 bytes in size, so that
13755 some of its elements cannot be directly accessed
13757 - a branch target might be > 64K away from the branch, so that
13758 it is not possible to use a PC-relative instruction.
13760 To fix those, we split the single literal pool into multiple
13761 pool chunks, reloading the pool base register at various
13762 points throughout the function to ensure it always points to
13763 the pool chunk the following code expects, and / or replace
13764 PC-relative branches by absolute branches.
13766 However, the two problems are interdependent: splitting the
13767 literal pool can move a branch further away from its target,
13768 causing the 64K limit to overflow, and on the other hand,
13769 replacing a PC-relative branch by an absolute branch means
13770 we need to put the branch target address into the literal
13771 pool, possibly causing it to overflow.
13773 So, we loop trying to fix up both problems until we manage
13774 to satisfy both conditions at the same time. Note that the
13775 loop is guaranteed to terminate as every pass of the loop
13776 strictly decreases the total number of PC-relative branches
13777 in the function. (This is not completely true as there
13778 might be branch-over-pool insns introduced by chunkify_start.
13779 Those never need to be split however.) */
13783 struct constant_pool *pool = NULL;
13785 /* Collect the literal pool. */
13786 if (!pool_overflow)
13788 pool = s390_mainpool_start ();
13790 pool_overflow = true;
13793 /* If literal pool overflowed, start to chunkify it. */
13795 pool = s390_chunkify_start ();
13797 /* Split out-of-range branches. If this has created new
13798 literal pool entries, cancel current chunk list and
13799 recompute it. zSeries machines have large branch
13800 instructions, so we never need to split a branch. */
13801 if (!TARGET_CPU_ZARCH && s390_split_branches ())
13804 s390_chunkify_cancel (pool);
13806 s390_mainpool_cancel (pool);
13811 /* If we made it up to here, both conditions are satisfied.
13812 Finish up literal pool related changes. */
13814 s390_chunkify_finish (pool);
13816 s390_mainpool_finish (pool);
13818 /* We're done splitting branches. */
13819 cfun->machine->split_branches_pending_p = false;
13823 /* Generate out-of-pool execute target insns. */
13824 if (TARGET_CPU_ZARCH)
13826 rtx_insn *insn, *target;
13829 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13831 label = s390_execute_label (insn);
13835 gcc_assert (label != const0_rtx);
13837 target = emit_label (XEXP (label, 0));
13838 INSN_ADDRESSES_NEW (target, -1);
13840 target = emit_insn (s390_execute_target (insn));
13841 INSN_ADDRESSES_NEW (target, -1);
13845 /* Try to optimize prologue and epilogue further. */
13846 s390_optimize_prologue ();
13848 /* Walk over the insns and do some >=z10 specific changes. */
13849 if (s390_tune >= PROCESSOR_2097_Z10)
13852 bool insn_added_p = false;
13854 /* The insn lengths and addresses have to be up to date for the
13855 following manipulations. */
13856 shorten_branches (get_insns ());
13858 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13860 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13864 insn_added_p |= s390_fix_long_loop_prediction (insn);
13866 if ((GET_CODE (PATTERN (insn)) == PARALLEL
13867 || GET_CODE (PATTERN (insn)) == SET)
13868 && s390_tune == PROCESSOR_2097_Z10)
13869 insn_added_p |= s390_z10_optimize_cmp (insn);
13872 /* Adjust branches if we added new instructions. */
13874 shorten_branches (get_insns ());
13877 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
13882 /* Insert NOPs for hotpatching. */
13883 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13885 1. inside the area covered by debug information to allow setting
13886 breakpoints at the NOPs,
13887 2. before any insn which results in an asm instruction,
13888 3. before in-function labels to avoid jumping to the NOPs, for
13889 example as part of a loop,
13890 4. before any barrier in case the function is completely empty
13891 (__builtin_unreachable ()) and has neither internal labels nor
13894 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
13896 /* Output a series of NOPs before the first active insn. */
13897 while (insn && hw_after > 0)
13899 if (hw_after >= 3 && TARGET_CPU_ZARCH)
13901 emit_insn_before (gen_nop_6_byte (), insn);
13904 else if (hw_after >= 2)
13906 emit_insn_before (gen_nop_4_byte (), insn);
13911 emit_insn_before (gen_nop_2_byte (), insn);
13918 /* Return true if INSN is a fp load insn writing register REGNO. */
13920 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
13923 enum attr_type flag = s390_safe_attr_type (insn);
13925 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
13928 set = single_set (insn);
13930 if (set == NULL_RTX)
13933 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
13936 if (REGNO (SET_DEST (set)) != regno)
13942 /* This value describes the distance to be avoided between an
13943 aritmetic fp instruction and an fp load writing the same register.
13944 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
13945 fine but the exact value has to be avoided. Otherwise the FP
13946 pipeline will throw an exception causing a major penalty. */
13947 #define Z10_EARLYLOAD_DISTANCE 7
13949 /* Rearrange the ready list in order to avoid the situation described
13950 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
13951 moved to the very end of the ready list. */
13953 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
13955 unsigned int regno;
13956 int nready = *nready_p;
13961 enum attr_type flag;
13964 /* Skip DISTANCE - 1 active insns. */
13965 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
13966 distance > 0 && insn != NULL_RTX;
13967 distance--, insn = prev_active_insn (insn))
13968 if (CALL_P (insn) || JUMP_P (insn))
13971 if (insn == NULL_RTX)
13974 set = single_set (insn);
13976 if (set == NULL_RTX || !REG_P (SET_DEST (set))
13977 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
13980 flag = s390_safe_attr_type (insn);
13982 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
13985 regno = REGNO (SET_DEST (set));
13988 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
13995 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14000 /* The s390_sched_state variable tracks the state of the current or
14001 the last instruction group.
14003 0,1,2 number of instructions scheduled in the current group
14004 3 the last group is complete - normal insns
14005 4 the last group was a cracked/expanded insn */
14007 static int s390_sched_state;
14009 #define S390_SCHED_STATE_NORMAL 3
14010 #define S390_SCHED_STATE_CRACKED 4
14012 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14013 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14014 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14015 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14017 static unsigned int
14018 s390_get_sched_attrmask (rtx_insn *insn)
14020 unsigned int mask = 0;
14024 case PROCESSOR_2827_ZEC12:
14025 if (get_attr_zEC12_cracked (insn))
14026 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14027 if (get_attr_zEC12_expanded (insn))
14028 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14029 if (get_attr_zEC12_endgroup (insn))
14030 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14031 if (get_attr_zEC12_groupalone (insn))
14032 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14034 case PROCESSOR_2964_Z13:
14035 if (get_attr_z13_cracked (insn))
14036 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14037 if (get_attr_z13_expanded (insn))
14038 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14039 if (get_attr_z13_endgroup (insn))
14040 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14041 if (get_attr_z13_groupalone (insn))
14042 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14045 gcc_unreachable ();
14050 static unsigned int
14051 s390_get_unit_mask (rtx_insn *insn, int *units)
14053 unsigned int mask = 0;
14057 case PROCESSOR_2964_Z13:
14059 if (get_attr_z13_unit_lsu (insn))
14061 if (get_attr_z13_unit_fxu (insn))
14063 if (get_attr_z13_unit_vfu (insn))
14067 gcc_unreachable ();
14072 /* Return the scheduling score for INSN. The higher the score the
14073 better. The score is calculated from the OOO scheduling attributes
14074 of INSN and the scheduling state s390_sched_state. */
14076 s390_sched_score (rtx_insn *insn)
14078 unsigned int mask = s390_get_sched_attrmask (insn);
14081 switch (s390_sched_state)
14084 /* Try to put insns into the first slot which would otherwise
14086 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14087 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14089 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14093 /* Prefer not cracked insns while trying to put together a
14095 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14096 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14097 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14099 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14103 /* Prefer not cracked insns while trying to put together a
14105 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14106 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14107 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14109 /* Prefer endgroup insns in the last slot. */
14110 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14113 case S390_SCHED_STATE_NORMAL:
14114 /* Prefer not cracked insns if the last was not cracked. */
14115 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14116 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
14118 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14121 case S390_SCHED_STATE_CRACKED:
14122 /* Try to keep cracked insns together to prevent them from
14123 interrupting groups. */
14124 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14125 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14130 if (s390_tune == PROCESSOR_2964_Z13)
14133 unsigned unit_mask, m = 1;
14135 unit_mask = s390_get_unit_mask (insn, &units);
14136 gcc_assert (units <= MAX_SCHED_UNITS);
14138 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14139 ago the last insn of this unit type got scheduled. This is
14140 supposed to help providing a proper instruction mix to the
14142 for (i = 0; i < units; i++, m <<= 1)
14144 score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
14145 MAX_SCHED_MIX_DISTANCE);
14150 /* This function is called via hook TARGET_SCHED_REORDER before
14151 issuing one insn from list READY which contains *NREADYP entries.
14152 For target z10 it reorders load instructions to avoid early load
14153 conflicts in the floating point pipeline */
14155 s390_sched_reorder (FILE *file, int verbose,
14156 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14158 if (s390_tune == PROCESSOR_2097_Z10
14159 && reload_completed
14161 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14163 if (s390_tune >= PROCESSOR_2827_ZEC12
14164 && reload_completed
14168 int last_index = *nreadyp - 1;
14169 int max_index = -1;
14170 int max_score = -1;
14173 /* Just move the insn with the highest score to the top (the
14174 end) of the list. A full sort is not needed since a conflict
14175 in the hazard recognition cannot happen. So the top insn in
14176 the ready list will always be taken. */
14177 for (i = last_index; i >= 0; i--)
14181 if (recog_memoized (ready[i]) < 0)
14184 score = s390_sched_score (ready[i]);
14185 if (score > max_score)
14192 if (max_index != -1)
14194 if (max_index != last_index)
14196 tmp = ready[max_index];
14197 ready[max_index] = ready[last_index];
14198 ready[last_index] = tmp;
14202 ";;\t\tBACKEND: move insn %d to the top of list\n",
14203 INSN_UID (ready[last_index]));
14205 else if (verbose > 5)
14207 ";;\t\tBACKEND: best insn %d already on top\n",
14208 INSN_UID (ready[last_index]));
14213 fprintf (file, "ready list ooo attributes - sched state: %d\n",
14216 for (i = last_index; i >= 0; i--)
14218 unsigned int sched_mask;
14219 rtx_insn *insn = ready[i];
14221 if (recog_memoized (insn) < 0)
14224 sched_mask = s390_get_sched_attrmask (insn);
14225 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14227 s390_sched_score (insn));
14228 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14229 ((M) & sched_mask) ? #ATTR : "");
14230 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14231 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14232 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14233 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14234 #undef PRINT_SCHED_ATTR
14235 if (s390_tune == PROCESSOR_2964_Z13)
14237 unsigned int unit_mask, m = 1;
14240 unit_mask = s390_get_unit_mask (insn, &units);
14241 fprintf (file, "(units:");
14242 for (j = 0; j < units; j++, m <<= 1)
14244 fprintf (file, " u%d", j);
14245 fprintf (file, ")");
14247 fprintf (file, "\n");
14252 return s390_issue_rate ();
14256 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14257 the scheduler has issued INSN. It stores the last issued insn into
14258 last_scheduled_insn in order to make it available for
14259 s390_sched_reorder. */
14261 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14263 last_scheduled_insn = insn;
14265 if (s390_tune >= PROCESSOR_2827_ZEC12
14266 && reload_completed
14267 && recog_memoized (insn) >= 0)
14269 unsigned int mask = s390_get_sched_attrmask (insn);
14271 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14272 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14273 s390_sched_state = S390_SCHED_STATE_CRACKED;
14274 else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
14275 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14276 s390_sched_state = S390_SCHED_STATE_NORMAL;
14279 /* Only normal insns are left (mask == 0). */
14280 switch (s390_sched_state)
14285 case S390_SCHED_STATE_NORMAL:
14286 if (s390_sched_state == S390_SCHED_STATE_NORMAL)
14287 s390_sched_state = 1;
14289 s390_sched_state++;
14292 case S390_SCHED_STATE_CRACKED:
14293 s390_sched_state = S390_SCHED_STATE_NORMAL;
14298 if (s390_tune == PROCESSOR_2964_Z13)
14301 unsigned unit_mask, m = 1;
14303 unit_mask = s390_get_unit_mask (insn, &units);
14304 gcc_assert (units <= MAX_SCHED_UNITS);
14306 for (i = 0; i < units; i++, m <<= 1)
14308 last_scheduled_unit_distance[i] = 0;
14309 else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
14310 last_scheduled_unit_distance[i]++;
14315 unsigned int sched_mask;
14317 sched_mask = s390_get_sched_attrmask (insn);
14319 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
14320 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
14321 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14322 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14323 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14324 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14325 #undef PRINT_SCHED_ATTR
14327 if (s390_tune == PROCESSOR_2964_Z13)
14329 unsigned int unit_mask, m = 1;
14332 unit_mask = s390_get_unit_mask (insn, &units);
14333 fprintf (file, "(units:");
14334 for (j = 0; j < units; j++, m <<= 1)
14336 fprintf (file, " %d", j);
14337 fprintf (file, ")");
14339 fprintf (file, " sched state: %d\n", s390_sched_state);
14341 if (s390_tune == PROCESSOR_2964_Z13)
14345 s390_get_unit_mask (insn, &units);
14347 fprintf (file, ";;\t\tBACKEND: units unused for: ");
14348 for (j = 0; j < units; j++)
14349 fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
14350 fprintf (file, "\n");
14355 if (GET_CODE (PATTERN (insn)) != USE
14356 && GET_CODE (PATTERN (insn)) != CLOBBER)
14363 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
14364 int verbose ATTRIBUTE_UNUSED,
14365 int max_ready ATTRIBUTE_UNUSED)
14367 last_scheduled_insn = NULL;
14368 memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
14369 s390_sched_state = 0;
14372 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
14373 a new number struct loop *loop should be unrolled if tuned for cpus with
14374 a built-in stride prefetcher.
14375 The loop is analyzed for memory accesses by calling check_dpu for
14376 each rtx of the loop. Depending on the loop_depth and the amount of
14377 memory accesses a new number <=nunroll is returned to improve the
14378 behavior of the hardware prefetch unit. */
14380 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
14385 unsigned mem_count = 0;
14387 if (s390_tune < PROCESSOR_2097_Z10)
14390 /* Count the number of memory references within the loop body. */
14391 bbs = get_loop_body (loop);
14392 subrtx_iterator::array_type array;
14393 for (i = 0; i < loop->num_nodes; i++)
14394 FOR_BB_INSNS (bbs[i], insn)
14395 if (INSN_P (insn) && INSN_CODE (insn) != -1)
14396 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14401 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
14402 if (mem_count == 0)
14405 switch (loop_depth(loop))
14408 return MIN (nunroll, 28 / mem_count);
14410 return MIN (nunroll, 22 / mem_count);
14412 return MIN (nunroll, 16 / mem_count);
14416 /* Restore the current options. This is a hook function and also called
14420 s390_function_specific_restore (struct gcc_options *opts,
14421 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
14423 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
14427 s390_option_override_internal (bool main_args_p,
14428 struct gcc_options *opts,
14429 const struct gcc_options *opts_set)
14431 const char *prefix;
14432 const char *suffix;
14434 /* Set up prefix/suffix so the error messages refer to either the command
14435 line argument, or the attribute(target). */
14443 prefix = "option(\"";
14448 /* Architecture mode defaults according to ABI. */
14449 if (!(opts_set->x_target_flags & MASK_ZARCH))
14452 opts->x_target_flags |= MASK_ZARCH;
14454 opts->x_target_flags &= ~MASK_ZARCH;
14457 /* Set the march default in case it hasn't been specified on cmdline. */
14458 if (!opts_set->x_s390_arch)
14459 opts->x_s390_arch = PROCESSOR_2064_Z900;
14460 else if (opts->x_s390_arch == PROCESSOR_9672_G5
14461 || opts->x_s390_arch == PROCESSOR_9672_G6)
14462 warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
14463 "in future releases; use at least %sarch=z900%s",
14464 prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
14465 suffix, prefix, suffix);
14467 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
14469 /* Determine processor to tune for. */
14470 if (!opts_set->x_s390_tune)
14471 opts->x_s390_tune = opts->x_s390_arch;
14472 else if (opts->x_s390_tune == PROCESSOR_9672_G5
14473 || opts->x_s390_tune == PROCESSOR_9672_G6)
14474 warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
14475 "in future releases; use at least %stune=z900%s",
14476 prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
14477 suffix, prefix, suffix);
14479 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
14481 /* Sanity checks. */
14482 if (opts->x_s390_arch == PROCESSOR_NATIVE
14483 || opts->x_s390_tune == PROCESSOR_NATIVE)
14484 gcc_unreachable ();
14485 if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
14486 error ("z/Architecture mode not supported on %s",
14487 processor_table[(int)opts->x_s390_arch].name);
14488 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
14489 error ("64-bit ABI not supported in ESA/390 mode");
14491 /* Enable hardware transactions if available and not explicitly
14492 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
14493 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
14495 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
14496 opts->x_target_flags |= MASK_OPT_HTM;
14498 opts->x_target_flags &= ~MASK_OPT_HTM;
14501 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
14503 if (TARGET_OPT_VX_P (opts->x_target_flags))
14505 if (!TARGET_CPU_VX_P (opts))
14506 error ("hardware vector support not available on %s",
14507 processor_table[(int)opts->x_s390_arch].name);
14508 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14509 error ("hardware vector support not available with -msoft-float");
14514 if (TARGET_CPU_VX_P (opts))
14515 /* Enable vector support if available and not explicitly disabled
14516 by user. E.g. with -m31 -march=z13 -mzarch */
14517 opts->x_target_flags |= MASK_OPT_VX;
14519 opts->x_target_flags &= ~MASK_OPT_VX;
14522 /* Use hardware DFP if available and not explicitly disabled by
14523 user. E.g. with -m31 -march=z10 -mzarch */
14524 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
14526 if (TARGET_DFP_P (opts))
14527 opts->x_target_flags |= MASK_HARD_DFP;
14529 opts->x_target_flags &= ~MASK_HARD_DFP;
14532 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
14534 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
14536 if (!TARGET_CPU_DFP_P (opts))
14537 error ("hardware decimal floating point instructions"
14538 " not available on %s",
14539 processor_table[(int)opts->x_s390_arch].name);
14540 if (!TARGET_ZARCH_P (opts->x_target_flags))
14541 error ("hardware decimal floating point instructions"
14542 " not available in ESA/390 mode");
14545 opts->x_target_flags &= ~MASK_HARD_DFP;
14548 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
14549 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14551 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
14552 && TARGET_HARD_DFP_P (opts->x_target_flags))
14553 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
14555 opts->x_target_flags &= ~MASK_HARD_DFP;
14558 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
14559 && TARGET_PACKED_STACK_P (opts->x_target_flags)
14560 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
14561 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
14564 if (opts->x_s390_stack_size)
14566 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
14567 error ("stack size must be greater than the stack guard value");
14568 else if (opts->x_s390_stack_size > 1 << 16)
14569 error ("stack size must not be greater than 64k");
14571 else if (opts->x_s390_stack_guard)
14572 error ("-mstack-guard implies use of -mstack-size");
14574 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
14575 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
14576 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
14579 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
14581 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
14582 opts->x_param_values,
14583 opts_set->x_param_values);
14584 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
14585 opts->x_param_values,
14586 opts_set->x_param_values);
14587 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
14588 opts->x_param_values,
14589 opts_set->x_param_values);
14590 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
14591 opts->x_param_values,
14592 opts_set->x_param_values);
14595 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
14596 opts->x_param_values,
14597 opts_set->x_param_values);
14598 /* values for loop prefetching */
14599 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
14600 opts->x_param_values,
14601 opts_set->x_param_values);
14602 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
14603 opts->x_param_values,
14604 opts_set->x_param_values);
14605 /* s390 has more than 2 levels and the size is much larger. Since
14606 we are always running virtualized assume that we only get a small
14607 part of the caches above l1. */
14608 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
14609 opts->x_param_values,
14610 opts_set->x_param_values);
14611 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
14612 opts->x_param_values,
14613 opts_set->x_param_values);
14614 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
14615 opts->x_param_values,
14616 opts_set->x_param_values);
14618 /* Use the alternative scheduling-pressure algorithm by default. */
14619 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
14620 opts->x_param_values,
14621 opts_set->x_param_values);
14623 /* Call target specific restore function to do post-init work. At the moment,
14624 this just sets opts->x_s390_cost_pointer. */
14625 s390_function_specific_restore (opts, NULL);
14629 s390_option_override (void)
14632 cl_deferred_option *opt;
14633 vec<cl_deferred_option> *v =
14634 (vec<cl_deferred_option> *) s390_deferred_options;
14637 FOR_EACH_VEC_ELT (*v, i, opt)
14639 switch (opt->opt_index)
14641 case OPT_mhotpatch_:
14648 strncpy (s, opt->arg, 256);
14650 t = strchr (s, ',');
14655 val1 = integral_argument (s);
14656 val2 = integral_argument (t);
14663 if (val1 == -1 || val2 == -1)
14665 /* argument is not a plain number */
14666 error ("arguments to %qs should be non-negative integers",
14670 else if (val1 > s390_hotpatch_hw_max
14671 || val2 > s390_hotpatch_hw_max)
14673 error ("argument to %qs is too large (max. %d)",
14674 "-mhotpatch=n,m", s390_hotpatch_hw_max);
14677 s390_hotpatch_hw_before_label = val1;
14678 s390_hotpatch_hw_after_label = val2;
14682 gcc_unreachable ();
14686 /* Set up function hooks. */
14687 init_machine_status = s390_init_machine_status;
14689 s390_option_override_internal (true, &global_options, &global_options_set);
14691 /* Save the initial options in case the user does function specific
14693 target_option_default_node = build_target_option_node (&global_options);
14694 target_option_current_node = target_option_default_node;
14696 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
14697 requires the arch flags to be evaluated already. Since prefetching
14698 is beneficial on s390, we enable it if available. */
14699 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
14700 flag_prefetch_loop_arrays = 1;
14704 /* Don't emit DWARF3/4 unless specifically selected. The TPF
14705 debuggers do not yet support DWARF 3/4. */
14706 if (!global_options_set.x_dwarf_strict)
14708 if (!global_options_set.x_dwarf_version)
14712 /* Register a target-specific optimization-and-lowering pass
14713 to run immediately before prologue and epilogue generation.
14715 Registering the pass must be done at start up. It's
14716 convenient to do it here. */
14717 opt_pass *new_pass = new pass_s390_early_mach (g);
14718 struct register_pass_info insert_pass_s390_early_mach =
14720 new_pass, /* pass */
14721 "pro_and_epilogue", /* reference_pass_name */
14722 1, /* ref_pass_instance_number */
14723 PASS_POS_INSERT_BEFORE /* po_op */
14725 register_pass (&insert_pass_s390_early_mach);
14728 #if S390_USE_TARGET_ATTRIBUTE
14729 /* Inner function to process the attribute((target(...))), take an argument and
14730 set the current options from the argument. If we have a list, recursively go
14734 s390_valid_target_attribute_inner_p (tree args,
14735 struct gcc_options *opts,
14736 struct gcc_options *new_opts_set,
14742 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
14743 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
14744 static const struct
14746 const char *string;
14750 int only_as_pragma;
14753 S390_ATTRIB ("arch=", OPT_march_, 1),
14754 S390_ATTRIB ("tune=", OPT_mtune_, 1),
14755 /* uinteger options */
14756 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
14757 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
14758 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
14759 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
14761 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
14762 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
14763 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
14764 S390_ATTRIB ("htm", OPT_mhtm, 0),
14765 S390_ATTRIB ("vx", OPT_mvx, 0),
14766 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
14767 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
14768 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
14769 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
14770 S390_PRAGMA ("zvector", OPT_mzvector, 0),
14771 /* boolean options */
14772 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
14777 /* If this is a list, recurse to get the options. */
14778 if (TREE_CODE (args) == TREE_LIST)
14781 int num_pragma_values;
14784 /* Note: attribs.c:decl_attributes prepends the values from
14785 current_target_pragma to the list of target attributes. To determine
14786 whether we're looking at a value of the attribute or the pragma we
14787 assume that the first [list_length (current_target_pragma)] values in
14788 the list are the values from the pragma. */
14789 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
14790 ? list_length (current_target_pragma) : 0;
14791 for (i = 0; args; args = TREE_CHAIN (args), i++)
14795 is_pragma = (force_pragma || i < num_pragma_values);
14796 if (TREE_VALUE (args)
14797 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
14798 opts, new_opts_set,
14807 else if (TREE_CODE (args) != STRING_CST)
14809 error ("attribute %<target%> argument not a string");
14813 /* Handle multiple arguments separated by commas. */
14814 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
14816 while (next_optstr && *next_optstr != '\0')
14818 char *p = next_optstr;
14820 char *comma = strchr (next_optstr, ',');
14821 size_t len, opt_len;
14827 enum cl_var_type var_type;
14833 len = comma - next_optstr;
14834 next_optstr = comma + 1;
14839 next_optstr = NULL;
14842 /* Recognize no-xxx. */
14843 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
14852 /* Find the option. */
14855 for (i = 0; i < ARRAY_SIZE (attrs); i++)
14857 opt_len = attrs[i].len;
14858 if (ch == attrs[i].string[0]
14859 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
14860 && memcmp (p, attrs[i].string, opt_len) == 0)
14862 opt = attrs[i].opt;
14863 if (!opt_set_p && cl_options[opt].cl_reject_negative)
14865 mask = cl_options[opt].var_value;
14866 var_type = cl_options[opt].var_type;
14872 /* Process the option. */
14875 error ("attribute(target(\"%s\")) is unknown", orig_p);
14878 else if (attrs[i].only_as_pragma && !force_pragma)
14880 /* Value is not allowed for the target attribute. */
14881 error ("Value %qs is not supported by attribute %<target%>",
14886 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
14888 if (var_type == CLVC_BIT_CLEAR)
14889 opt_set_p = !opt_set_p;
14892 opts->x_target_flags |= mask;
14894 opts->x_target_flags &= ~mask;
14895 new_opts_set->x_target_flags |= mask;
14898 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
14902 if (cl_options[opt].cl_uinteger)
14904 /* Unsigned integer argument. Code based on the function
14905 decode_cmdline_option () in opts-common.c. */
14906 value = integral_argument (p + opt_len);
14909 value = (opt_set_p) ? 1 : 0;
14913 struct cl_decoded_option decoded;
14915 /* Value range check; only implemented for numeric and boolean
14916 options at the moment. */
14917 generate_option (opt, NULL, value, CL_TARGET, &decoded);
14918 s390_handle_option (opts, new_opts_set, &decoded, input_location);
14919 set_option (opts, new_opts_set, opt, value,
14920 p + opt_len, DK_UNSPECIFIED, input_location,
14925 error ("attribute(target(\"%s\")) is unknown", orig_p);
14930 else if (cl_options[opt].var_type == CLVC_ENUM)
14935 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
14937 set_option (opts, new_opts_set, opt, value,
14938 p + opt_len, DK_UNSPECIFIED, input_location,
14942 error ("attribute(target(\"%s\")) is unknown", orig_p);
14948 gcc_unreachable ();
14953 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
14956 s390_valid_target_attribute_tree (tree args,
14957 struct gcc_options *opts,
14958 const struct gcc_options *opts_set,
14961 tree t = NULL_TREE;
14962 struct gcc_options new_opts_set;
14964 memset (&new_opts_set, 0, sizeof (new_opts_set));
14966 /* Process each of the options on the chain. */
14967 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
14969 return error_mark_node;
14971 /* If some option was set (even if it has not changed), rerun
14972 s390_option_override_internal, and then save the options away. */
14973 if (new_opts_set.x_target_flags
14974 || new_opts_set.x_s390_arch
14975 || new_opts_set.x_s390_tune
14976 || new_opts_set.x_s390_stack_guard
14977 || new_opts_set.x_s390_stack_size
14978 || new_opts_set.x_s390_branch_cost
14979 || new_opts_set.x_s390_warn_framesize
14980 || new_opts_set.x_s390_warn_dynamicstack_p)
14982 const unsigned char *src = (const unsigned char *)opts_set;
14983 unsigned char *dest = (unsigned char *)&new_opts_set;
14986 /* Merge the original option flags into the new ones. */
14987 for (i = 0; i < sizeof(*opts_set); i++)
14990 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
14991 s390_option_override_internal (false, opts, &new_opts_set);
14992 /* Save the current options unless we are validating options for
14994 t = build_target_option_node (opts);
14999 /* Hook to validate attribute((target("string"))). */
15002 s390_valid_target_attribute_p (tree fndecl,
15003 tree ARG_UNUSED (name),
15005 int ARG_UNUSED (flags))
15007 struct gcc_options func_options;
15008 tree new_target, new_optimize;
15011 /* attribute((target("default"))) does nothing, beyond
15012 affecting multi-versioning. */
15013 if (TREE_VALUE (args)
15014 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15015 && TREE_CHAIN (args) == NULL_TREE
15016 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15019 tree old_optimize = build_optimization_node (&global_options);
15021 /* Get the optimization options of the current function. */
15022 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15024 if (!func_optimize)
15025 func_optimize = old_optimize;
15027 /* Init func_options. */
15028 memset (&func_options, 0, sizeof (func_options));
15029 init_options_struct (&func_options, NULL);
15030 lang_hooks.init_options_struct (&func_options);
15032 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15034 /* Initialize func_options to the default before its target options can
15036 cl_target_option_restore (&func_options,
15037 TREE_TARGET_OPTION (target_option_default_node));
15039 new_target = s390_valid_target_attribute_tree (args, &func_options,
15040 &global_options_set,
15042 current_target_pragma));
15043 new_optimize = build_optimization_node (&func_options);
15044 if (new_target == error_mark_node)
15046 else if (fndecl && new_target)
15048 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15049 if (old_optimize != new_optimize)
15050 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15055 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15059 s390_activate_target_options (tree new_tree)
15061 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
15062 if (TREE_TARGET_GLOBALS (new_tree))
15063 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15064 else if (new_tree == target_option_default_node)
15065 restore_target_globals (&default_target_globals);
15067 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15068 s390_previous_fndecl = NULL_TREE;
15071 /* Establish appropriate back-end context for processing the function
15072 FNDECL. The argument might be NULL to indicate processing at top
15073 level, outside of any function scope. */
15075 s390_set_current_function (tree fndecl)
15077 /* Only change the context if the function changes. This hook is called
15078 several times in the course of compiling a function, and we don't want to
15079 slow things down too much or call target_reinit when it isn't safe. */
15080 if (fndecl == s390_previous_fndecl)
15084 if (s390_previous_fndecl == NULL_TREE)
15085 old_tree = target_option_current_node;
15086 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
15087 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
15089 old_tree = target_option_default_node;
15091 if (fndecl == NULL_TREE)
15093 if (old_tree != target_option_current_node)
15094 s390_activate_target_options (target_option_current_node);
15098 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
15099 if (new_tree == NULL_TREE)
15100 new_tree = target_option_default_node;
15102 if (old_tree != new_tree)
15103 s390_activate_target_options (new_tree);
15104 s390_previous_fndecl = fndecl;
15108 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
15111 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
15112 unsigned int align ATTRIBUTE_UNUSED,
15113 enum by_pieces_operation op ATTRIBUTE_UNUSED,
15114 bool speed_p ATTRIBUTE_UNUSED)
15116 return (size == 1 || size == 2
15117 || size == 4 || (TARGET_ZARCH && size == 8));
15120 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
15123 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
15125 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
15126 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
15127 tree call_efpc = build_call_expr (efpc, 0);
15128 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
15130 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
15131 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
15132 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
15133 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
15134 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
15135 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
15137 /* Generates the equivalent of feholdexcept (&fenv_var)
15139 fenv_var = __builtin_s390_efpc ();
15140 __builtin_s390_sfpc (fenv_var & mask) */
15141 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
15143 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
15144 build_int_cst (unsigned_type_node,
15145 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
15146 FPC_EXCEPTION_MASK)));
15147 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
15148 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
15150 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
15152 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
15153 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
15154 build_int_cst (unsigned_type_node,
15155 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
15156 *clear = build_call_expr (sfpc, 1, new_fpc);
15158 /* Generates the equivalent of feupdateenv (fenv_var)
15160 old_fpc = __builtin_s390_efpc ();
15161 __builtin_s390_sfpc (fenv_var);
15162 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
15164 old_fpc = create_tmp_var_raw (unsigned_type_node);
15165 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
15166 old_fpc, call_efpc);
15168 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
15170 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
15171 build_int_cst (unsigned_type_node,
15173 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
15174 build_int_cst (unsigned_type_node,
15176 tree atomic_feraiseexcept
15177 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
15178 raise_old_except = build_call_expr (atomic_feraiseexcept,
15179 1, raise_old_except);
15181 *update = build2 (COMPOUND_EXPR, void_type_node,
15182 build2 (COMPOUND_EXPR, void_type_node,
15183 store_old_fpc, set_new_fpc),
15186 #undef FPC_EXCEPTION_MASK
15187 #undef FPC_FLAGS_MASK
15188 #undef FPC_DXC_MASK
15189 #undef FPC_EXCEPTION_MASK_SHIFT
15190 #undef FPC_FLAGS_SHIFT
15191 #undef FPC_DXC_SHIFT
15194 /* Return the vector mode to be used for inner mode MODE when doing
15196 static machine_mode
15197 s390_preferred_simd_mode (machine_mode mode)
15217 /* Our hardware does not require vectors to be strictly aligned. */
15219 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
15220 const_tree type ATTRIBUTE_UNUSED,
15221 int misalignment ATTRIBUTE_UNUSED,
15222 bool is_packed ATTRIBUTE_UNUSED)
15227 return default_builtin_support_vector_misalignment (mode, type, misalignment,
15231 /* The vector ABI requires vector types to be aligned on an 8 byte
15232 boundary (our stack alignment). However, we allow this to be
15233 overriden by the user, while this definitely breaks the ABI. */
15234 static HOST_WIDE_INT
15235 s390_vector_alignment (const_tree type)
15237 if (!TARGET_VX_ABI)
15238 return default_vector_alignment (type);
15240 if (TYPE_USER_ALIGN (type))
15241 return TYPE_ALIGN (type);
15243 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
15246 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15247 /* Implement TARGET_ASM_FILE_START. */
15249 s390_asm_file_start (void)
15251 default_file_start ();
15252 s390_asm_output_machine_for_arch (asm_out_file);
15256 /* Implement TARGET_ASM_FILE_END. */
15258 s390_asm_file_end (void)
15260 #ifdef HAVE_AS_GNU_ATTRIBUTE
15261 varpool_node *vnode;
15262 cgraph_node *cnode;
15264 FOR_EACH_VARIABLE (vnode)
15265 if (TREE_PUBLIC (vnode->decl))
15266 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
15268 FOR_EACH_FUNCTION (cnode)
15269 if (TREE_PUBLIC (cnode->decl))
15270 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
15273 if (s390_vector_abi != 0)
15274 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
15277 file_end_indicate_exec_stack ();
15279 if (flag_split_stack)
15280 file_end_indicate_split_stack ();
15283 /* Return true if TYPE is a vector bool type. */
15285 s390_vector_bool_type_p (const_tree type)
15287 return TYPE_VECTOR_OPAQUE (type);
15290 /* Return the diagnostic message string if the binary operation OP is
15291 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15293 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
15295 bool bool1_p, bool2_p;
15299 machine_mode mode1, mode2;
15301 if (!TARGET_ZVECTOR)
15304 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
15307 bool1_p = s390_vector_bool_type_p (type1);
15308 bool2_p = s390_vector_bool_type_p (type2);
15310 /* Mixing signed and unsigned types is forbidden for all
15312 if (!bool1_p && !bool2_p
15313 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
15314 return N_("types differ in signess");
15316 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
15317 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
15318 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
15319 || op == ROUND_DIV_EXPR);
15320 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
15321 || op == EQ_EXPR || op == NE_EXPR);
15323 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
15324 return N_("binary operator does not support two vector bool operands");
15326 if (bool1_p != bool2_p && (muldiv_p || compare_p))
15327 return N_("binary operator does not support vector bool operand");
15329 mode1 = TYPE_MODE (type1);
15330 mode2 = TYPE_MODE (type2);
15332 if (bool1_p != bool2_p && plusminus_p
15333 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
15334 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
15335 return N_("binary operator does not support mixing vector "
15336 "bool with floating point vector operands");
15341 /* Implement TARGET_C_EXCESS_PRECISION.
15343 FIXME: For historical reasons, float_t and double_t are typedef'ed to
15344 double on s390, causing operations on float_t to operate in a higher
15345 precision than is necessary. However, it is not the case that SFmode
15346 operations have implicit excess precision, and we generate more optimal
15347 code if we let the compiler know no implicit extra precision is added.
15349 That means when we are compiling with -fexcess-precision=fast, the value
15350 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
15351 float_t (though they would be correct for -fexcess-precision=standard).
15353 A complete fix would modify glibc to remove the unnecessary typedef
15354 of float_t to double. */
15356 static enum flt_eval_method
15357 s390_excess_precision (enum excess_precision_type type)
15361 case EXCESS_PRECISION_TYPE_IMPLICIT:
15362 case EXCESS_PRECISION_TYPE_FAST:
15363 /* The fastest type to promote to will always be the native type,
15364 whether that occurs with implicit excess precision or
15366 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
15367 case EXCESS_PRECISION_TYPE_STANDARD:
15368 /* Otherwise, when we are in a standards compliant mode, to
15369 ensure consistency with the implementation in glibc, report that
15370 float is evaluated to the range and precision of double. */
15371 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
15373 gcc_unreachable ();
15375 return FLT_EVAL_METHOD_UNPREDICTABLE;
15378 /* Initialize GCC target structure. */
15380 #undef TARGET_ASM_ALIGNED_HI_OP
15381 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
15382 #undef TARGET_ASM_ALIGNED_DI_OP
15383 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
15384 #undef TARGET_ASM_INTEGER
15385 #define TARGET_ASM_INTEGER s390_assemble_integer
15387 #undef TARGET_ASM_OPEN_PAREN
15388 #define TARGET_ASM_OPEN_PAREN ""
15390 #undef TARGET_ASM_CLOSE_PAREN
15391 #define TARGET_ASM_CLOSE_PAREN ""
15393 #undef TARGET_OPTION_OVERRIDE
15394 #define TARGET_OPTION_OVERRIDE s390_option_override
15396 #ifdef TARGET_THREAD_SSP_OFFSET
15397 #undef TARGET_STACK_PROTECT_GUARD
15398 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
15401 #undef TARGET_ENCODE_SECTION_INFO
15402 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
15404 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15405 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15408 #undef TARGET_HAVE_TLS
15409 #define TARGET_HAVE_TLS true
15411 #undef TARGET_CANNOT_FORCE_CONST_MEM
15412 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
15414 #undef TARGET_DELEGITIMIZE_ADDRESS
15415 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
15417 #undef TARGET_LEGITIMIZE_ADDRESS
15418 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
15420 #undef TARGET_RETURN_IN_MEMORY
15421 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
15423 #undef TARGET_INIT_BUILTINS
15424 #define TARGET_INIT_BUILTINS s390_init_builtins
15425 #undef TARGET_EXPAND_BUILTIN
15426 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
15427 #undef TARGET_BUILTIN_DECL
15428 #define TARGET_BUILTIN_DECL s390_builtin_decl
15430 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
15431 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
15433 #undef TARGET_ASM_OUTPUT_MI_THUNK
15434 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
15435 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
15436 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
15438 #undef TARGET_C_EXCESS_PRECISION
15439 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
15441 #undef TARGET_SCHED_ADJUST_PRIORITY
15442 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
15443 #undef TARGET_SCHED_ISSUE_RATE
15444 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
15445 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15446 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
15448 #undef TARGET_SCHED_VARIABLE_ISSUE
15449 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
15450 #undef TARGET_SCHED_REORDER
15451 #define TARGET_SCHED_REORDER s390_sched_reorder
15452 #undef TARGET_SCHED_INIT
15453 #define TARGET_SCHED_INIT s390_sched_init
15455 #undef TARGET_CANNOT_COPY_INSN_P
15456 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
15457 #undef TARGET_RTX_COSTS
15458 #define TARGET_RTX_COSTS s390_rtx_costs
15459 #undef TARGET_ADDRESS_COST
15460 #define TARGET_ADDRESS_COST s390_address_cost
15461 #undef TARGET_REGISTER_MOVE_COST
15462 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
15463 #undef TARGET_MEMORY_MOVE_COST
15464 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
15465 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
15466 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
15467 s390_builtin_vectorization_cost
15469 #undef TARGET_MACHINE_DEPENDENT_REORG
15470 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
15472 #undef TARGET_VALID_POINTER_MODE
15473 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
15475 #undef TARGET_BUILD_BUILTIN_VA_LIST
15476 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
15477 #undef TARGET_EXPAND_BUILTIN_VA_START
15478 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
15479 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
15480 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
15482 #undef TARGET_PROMOTE_FUNCTION_MODE
15483 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
15484 #undef TARGET_PASS_BY_REFERENCE
15485 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
15487 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
15488 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
15489 #undef TARGET_FUNCTION_ARG
15490 #define TARGET_FUNCTION_ARG s390_function_arg
15491 #undef TARGET_FUNCTION_ARG_ADVANCE
15492 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
15493 #undef TARGET_FUNCTION_VALUE
15494 #define TARGET_FUNCTION_VALUE s390_function_value
15495 #undef TARGET_LIBCALL_VALUE
15496 #define TARGET_LIBCALL_VALUE s390_libcall_value
15497 #undef TARGET_STRICT_ARGUMENT_NAMING
15498 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
15500 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
15501 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
15503 #undef TARGET_FIXED_CONDITION_CODE_REGS
15504 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
15506 #undef TARGET_CC_MODES_COMPATIBLE
15507 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
15509 #undef TARGET_INVALID_WITHIN_DOLOOP
15510 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
15513 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
15514 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
15517 #undef TARGET_DWARF_FRAME_REG_MODE
15518 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
15520 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
15521 #undef TARGET_MANGLE_TYPE
15522 #define TARGET_MANGLE_TYPE s390_mangle_type
15525 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15526 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15528 #undef TARGET_VECTOR_MODE_SUPPORTED_P
15529 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
15531 #undef TARGET_PREFERRED_RELOAD_CLASS
15532 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
15534 #undef TARGET_SECONDARY_RELOAD
15535 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
15537 #undef TARGET_LIBGCC_CMP_RETURN_MODE
15538 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
15540 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
15541 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
15543 #undef TARGET_LEGITIMATE_ADDRESS_P
15544 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
15546 #undef TARGET_LEGITIMATE_CONSTANT_P
15547 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
15549 #undef TARGET_LRA_P
15550 #define TARGET_LRA_P s390_lra_p
15552 #undef TARGET_CAN_ELIMINATE
15553 #define TARGET_CAN_ELIMINATE s390_can_eliminate
15555 #undef TARGET_CONDITIONAL_REGISTER_USAGE
15556 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
15558 #undef TARGET_LOOP_UNROLL_ADJUST
15559 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
15561 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
15562 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
15563 #undef TARGET_TRAMPOLINE_INIT
15564 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
15566 #undef TARGET_UNWIND_WORD_MODE
15567 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
15569 #undef TARGET_CANONICALIZE_COMPARISON
15570 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
15572 #undef TARGET_HARD_REGNO_SCRATCH_OK
15573 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
15575 #undef TARGET_ATTRIBUTE_TABLE
15576 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
15578 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
15579 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
15581 #undef TARGET_SET_UP_BY_PROLOGUE
15582 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
15584 #undef TARGET_EXTRA_LIVE_ON_ENTRY
15585 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
15587 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
15588 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
15589 s390_use_by_pieces_infrastructure_p
15591 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
15592 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
15594 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
15595 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
15597 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
15598 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
15600 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
15601 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
15603 #undef TARGET_VECTOR_ALIGNMENT
15604 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
15606 #undef TARGET_INVALID_BINARY_OP
15607 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
15609 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15610 #undef TARGET_ASM_FILE_START
15611 #define TARGET_ASM_FILE_START s390_asm_file_start
15614 #undef TARGET_ASM_FILE_END
15615 #define TARGET_ASM_FILE_END s390_asm_file_end
15617 #if S390_USE_TARGET_ATTRIBUTE
15618 #undef TARGET_SET_CURRENT_FUNCTION
15619 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
15621 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
15622 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
15625 #undef TARGET_OPTION_RESTORE
15626 #define TARGET_OPTION_RESTORE s390_function_specific_restore
15628 struct gcc_target targetm = TARGET_INITIALIZER;
15630 #include "gt-s390.h"