1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2017 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
28 #include "target-globals.h"
37 #include "stringpool.h"
44 #include "diagnostic-core.h"
45 #include "diagnostic.h"
47 #include "fold-const.h"
48 #include "print-tree.h"
49 #include "stor-layout.h"
52 #include "conditions.h"
54 #include "insn-attr.h"
66 #include "cfgcleanup.h"
68 #include "langhooks.h"
69 #include "internal-fn.h"
70 #include "gimple-fold.h"
75 #include "tree-pass.h"
80 #include "tm-constrs.h"
82 /* This file should be included last. */
83 #include "target-def.h"
85 /* Remember the last target of s390_set_current_function. */
86 static GTY(()) tree s390_previous_fndecl;
88 /* Define the specific costs for a given cpu. */
90 struct processor_costs
93 const int m; /* cost of an M instruction. */
94 const int mghi; /* cost of an MGHI instruction. */
95 const int mh; /* cost of an MH instruction. */
96 const int mhi; /* cost of an MHI instruction. */
97 const int ml; /* cost of an ML instruction. */
98 const int mr; /* cost of an MR instruction. */
99 const int ms; /* cost of an MS instruction. */
100 const int msg; /* cost of an MSG instruction. */
101 const int msgf; /* cost of an MSGF instruction. */
102 const int msgfr; /* cost of an MSGFR instruction. */
103 const int msgr; /* cost of an MSGR instruction. */
104 const int msr; /* cost of an MSR instruction. */
105 const int mult_df; /* cost of multiplication in DFmode. */
108 const int sqxbr; /* cost of square root in TFmode. */
109 const int sqdbr; /* cost of square root in DFmode. */
110 const int sqebr; /* cost of square root in SFmode. */
111 /* multiply and add */
112 const int madbr; /* cost of multiply and add in DFmode. */
113 const int maebr; /* cost of multiply and add in SFmode. */
125 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
128 struct processor_costs z900_cost =
130 COSTS_N_INSNS (5), /* M */
131 COSTS_N_INSNS (10), /* MGHI */
132 COSTS_N_INSNS (5), /* MH */
133 COSTS_N_INSNS (4), /* MHI */
134 COSTS_N_INSNS (5), /* ML */
135 COSTS_N_INSNS (5), /* MR */
136 COSTS_N_INSNS (4), /* MS */
137 COSTS_N_INSNS (15), /* MSG */
138 COSTS_N_INSNS (7), /* MSGF */
139 COSTS_N_INSNS (7), /* MSGFR */
140 COSTS_N_INSNS (10), /* MSGR */
141 COSTS_N_INSNS (4), /* MSR */
142 COSTS_N_INSNS (7), /* multiplication in DFmode */
143 COSTS_N_INSNS (13), /* MXBR */
144 COSTS_N_INSNS (136), /* SQXBR */
145 COSTS_N_INSNS (44), /* SQDBR */
146 COSTS_N_INSNS (35), /* SQEBR */
147 COSTS_N_INSNS (18), /* MADBR */
148 COSTS_N_INSNS (13), /* MAEBR */
149 COSTS_N_INSNS (134), /* DXBR */
150 COSTS_N_INSNS (30), /* DDBR */
151 COSTS_N_INSNS (27), /* DEBR */
152 COSTS_N_INSNS (220), /* DLGR */
153 COSTS_N_INSNS (34), /* DLR */
154 COSTS_N_INSNS (34), /* DR */
155 COSTS_N_INSNS (32), /* DSGFR */
156 COSTS_N_INSNS (32), /* DSGR */
160 struct processor_costs z990_cost =
162 COSTS_N_INSNS (4), /* M */
163 COSTS_N_INSNS (2), /* MGHI */
164 COSTS_N_INSNS (2), /* MH */
165 COSTS_N_INSNS (2), /* MHI */
166 COSTS_N_INSNS (4), /* ML */
167 COSTS_N_INSNS (4), /* MR */
168 COSTS_N_INSNS (5), /* MS */
169 COSTS_N_INSNS (6), /* MSG */
170 COSTS_N_INSNS (4), /* MSGF */
171 COSTS_N_INSNS (4), /* MSGFR */
172 COSTS_N_INSNS (4), /* MSGR */
173 COSTS_N_INSNS (4), /* MSR */
174 COSTS_N_INSNS (1), /* multiplication in DFmode */
175 COSTS_N_INSNS (28), /* MXBR */
176 COSTS_N_INSNS (130), /* SQXBR */
177 COSTS_N_INSNS (66), /* SQDBR */
178 COSTS_N_INSNS (38), /* SQEBR */
179 COSTS_N_INSNS (1), /* MADBR */
180 COSTS_N_INSNS (1), /* MAEBR */
181 COSTS_N_INSNS (60), /* DXBR */
182 COSTS_N_INSNS (40), /* DDBR */
183 COSTS_N_INSNS (26), /* DEBR */
184 COSTS_N_INSNS (176), /* DLGR */
185 COSTS_N_INSNS (31), /* DLR */
186 COSTS_N_INSNS (31), /* DR */
187 COSTS_N_INSNS (31), /* DSGFR */
188 COSTS_N_INSNS (31), /* DSGR */
192 struct processor_costs z9_109_cost =
194 COSTS_N_INSNS (4), /* M */
195 COSTS_N_INSNS (2), /* MGHI */
196 COSTS_N_INSNS (2), /* MH */
197 COSTS_N_INSNS (2), /* MHI */
198 COSTS_N_INSNS (4), /* ML */
199 COSTS_N_INSNS (4), /* MR */
200 COSTS_N_INSNS (5), /* MS */
201 COSTS_N_INSNS (6), /* MSG */
202 COSTS_N_INSNS (4), /* MSGF */
203 COSTS_N_INSNS (4), /* MSGFR */
204 COSTS_N_INSNS (4), /* MSGR */
205 COSTS_N_INSNS (4), /* MSR */
206 COSTS_N_INSNS (1), /* multiplication in DFmode */
207 COSTS_N_INSNS (28), /* MXBR */
208 COSTS_N_INSNS (130), /* SQXBR */
209 COSTS_N_INSNS (66), /* SQDBR */
210 COSTS_N_INSNS (38), /* SQEBR */
211 COSTS_N_INSNS (1), /* MADBR */
212 COSTS_N_INSNS (1), /* MAEBR */
213 COSTS_N_INSNS (60), /* DXBR */
214 COSTS_N_INSNS (40), /* DDBR */
215 COSTS_N_INSNS (26), /* DEBR */
216 COSTS_N_INSNS (30), /* DLGR */
217 COSTS_N_INSNS (23), /* DLR */
218 COSTS_N_INSNS (23), /* DR */
219 COSTS_N_INSNS (24), /* DSGFR */
220 COSTS_N_INSNS (24), /* DSGR */
224 struct processor_costs z10_cost =
226 COSTS_N_INSNS (10), /* M */
227 COSTS_N_INSNS (10), /* MGHI */
228 COSTS_N_INSNS (10), /* MH */
229 COSTS_N_INSNS (10), /* MHI */
230 COSTS_N_INSNS (10), /* ML */
231 COSTS_N_INSNS (10), /* MR */
232 COSTS_N_INSNS (10), /* MS */
233 COSTS_N_INSNS (10), /* MSG */
234 COSTS_N_INSNS (10), /* MSGF */
235 COSTS_N_INSNS (10), /* MSGFR */
236 COSTS_N_INSNS (10), /* MSGR */
237 COSTS_N_INSNS (10), /* MSR */
238 COSTS_N_INSNS (1) , /* multiplication in DFmode */
239 COSTS_N_INSNS (50), /* MXBR */
240 COSTS_N_INSNS (120), /* SQXBR */
241 COSTS_N_INSNS (52), /* SQDBR */
242 COSTS_N_INSNS (38), /* SQEBR */
243 COSTS_N_INSNS (1), /* MADBR */
244 COSTS_N_INSNS (1), /* MAEBR */
245 COSTS_N_INSNS (111), /* DXBR */
246 COSTS_N_INSNS (39), /* DDBR */
247 COSTS_N_INSNS (32), /* DEBR */
248 COSTS_N_INSNS (160), /* DLGR */
249 COSTS_N_INSNS (71), /* DLR */
250 COSTS_N_INSNS (71), /* DR */
251 COSTS_N_INSNS (71), /* DSGFR */
252 COSTS_N_INSNS (71), /* DSGR */
256 struct processor_costs z196_cost =
258 COSTS_N_INSNS (7), /* M */
259 COSTS_N_INSNS (5), /* MGHI */
260 COSTS_N_INSNS (5), /* MH */
261 COSTS_N_INSNS (5), /* MHI */
262 COSTS_N_INSNS (7), /* ML */
263 COSTS_N_INSNS (7), /* MR */
264 COSTS_N_INSNS (6), /* MS */
265 COSTS_N_INSNS (8), /* MSG */
266 COSTS_N_INSNS (6), /* MSGF */
267 COSTS_N_INSNS (6), /* MSGFR */
268 COSTS_N_INSNS (8), /* MSGR */
269 COSTS_N_INSNS (6), /* MSR */
270 COSTS_N_INSNS (1) , /* multiplication in DFmode */
271 COSTS_N_INSNS (40), /* MXBR B+40 */
272 COSTS_N_INSNS (100), /* SQXBR B+100 */
273 COSTS_N_INSNS (42), /* SQDBR B+42 */
274 COSTS_N_INSNS (28), /* SQEBR B+28 */
275 COSTS_N_INSNS (1), /* MADBR B */
276 COSTS_N_INSNS (1), /* MAEBR B */
277 COSTS_N_INSNS (101), /* DXBR B+101 */
278 COSTS_N_INSNS (29), /* DDBR */
279 COSTS_N_INSNS (22), /* DEBR */
280 COSTS_N_INSNS (160), /* DLGR cracked */
281 COSTS_N_INSNS (160), /* DLR cracked */
282 COSTS_N_INSNS (160), /* DR expanded */
283 COSTS_N_INSNS (160), /* DSGFR cracked */
284 COSTS_N_INSNS (160), /* DSGR cracked */
288 struct processor_costs zEC12_cost =
290 COSTS_N_INSNS (7), /* M */
291 COSTS_N_INSNS (5), /* MGHI */
292 COSTS_N_INSNS (5), /* MH */
293 COSTS_N_INSNS (5), /* MHI */
294 COSTS_N_INSNS (7), /* ML */
295 COSTS_N_INSNS (7), /* MR */
296 COSTS_N_INSNS (6), /* MS */
297 COSTS_N_INSNS (8), /* MSG */
298 COSTS_N_INSNS (6), /* MSGF */
299 COSTS_N_INSNS (6), /* MSGFR */
300 COSTS_N_INSNS (8), /* MSGR */
301 COSTS_N_INSNS (6), /* MSR */
302 COSTS_N_INSNS (1) , /* multiplication in DFmode */
303 COSTS_N_INSNS (40), /* MXBR B+40 */
304 COSTS_N_INSNS (100), /* SQXBR B+100 */
305 COSTS_N_INSNS (42), /* SQDBR B+42 */
306 COSTS_N_INSNS (28), /* SQEBR B+28 */
307 COSTS_N_INSNS (1), /* MADBR B */
308 COSTS_N_INSNS (1), /* MAEBR B */
309 COSTS_N_INSNS (131), /* DXBR B+131 */
310 COSTS_N_INSNS (29), /* DDBR */
311 COSTS_N_INSNS (22), /* DEBR */
312 COSTS_N_INSNS (160), /* DLGR cracked */
313 COSTS_N_INSNS (160), /* DLR cracked */
314 COSTS_N_INSNS (160), /* DR expanded */
315 COSTS_N_INSNS (160), /* DSGFR cracked */
316 COSTS_N_INSNS (160), /* DSGR cracked */
321 const char *const name;
322 const enum processor_type processor;
323 const struct processor_costs *cost;
325 const processor_table[] =
327 { "g5", PROCESSOR_9672_G5, &z900_cost },
328 { "g6", PROCESSOR_9672_G6, &z900_cost },
329 { "z900", PROCESSOR_2064_Z900, &z900_cost },
330 { "z990", PROCESSOR_2084_Z990, &z990_cost },
331 { "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
332 { "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost },
333 { "z10", PROCESSOR_2097_Z10, &z10_cost },
334 { "z196", PROCESSOR_2817_Z196, &z196_cost },
335 { "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost },
336 { "z13", PROCESSOR_2964_Z13, &zEC12_cost },
337 { "arch12", PROCESSOR_ARCH12, &zEC12_cost },
338 { "native", PROCESSOR_NATIVE, NULL }
341 extern int reload_completed;
343 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
344 static rtx_insn *last_scheduled_insn;
345 #define MAX_SCHED_UNITS 3
346 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
348 /* The maximum score added for an instruction whose unit hasn't been
349 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
350 give instruction mix scheduling more priority over instruction
352 #define MAX_SCHED_MIX_SCORE 8
354 /* The maximum distance up to which individual scores will be
355 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
356 Increase this with the OOO windows size of the machine. */
357 #define MAX_SCHED_MIX_DISTANCE 100
359 /* Structure used to hold the components of a S/390 memory
360 address. A legitimate address on S/390 is of the general
362 base + index + displacement
363 where any of the components is optional.
365 base and index are registers of the class ADDR_REGS,
366 displacement is an unsigned 12-bit immediate constant. */
377 /* The following structure is embedded in the machine
378 specific part of struct function. */
380 struct GTY (()) s390_frame_layout
382 /* Offset within stack frame. */
383 HOST_WIDE_INT gprs_offset;
384 HOST_WIDE_INT f0_offset;
385 HOST_WIDE_INT f4_offset;
386 HOST_WIDE_INT f8_offset;
387 HOST_WIDE_INT backchain_offset;
389 /* Number of first and last gpr where slots in the register
390 save area are reserved for. */
391 int first_save_gpr_slot;
392 int last_save_gpr_slot;
394 /* Location (FP register number) where GPRs (r0-r15) should
396 0 - does not need to be saved at all
398 #define SAVE_SLOT_NONE 0
399 #define SAVE_SLOT_STACK -1
400 signed char gpr_save_slots[16];
402 /* Number of first and last gpr to be saved, restored. */
404 int first_restore_gpr;
406 int last_restore_gpr;
408 /* Bits standing for floating point registers. Set, if the
409 respective register has to be saved. Starting with reg 16 (f0)
410 at the rightmost bit.
411 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
412 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
413 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
414 unsigned int fpr_bitmap;
416 /* Number of floating point registers f8-f15 which must be saved. */
419 /* Set if return address needs to be saved.
420 This flag is set by s390_return_addr_rtx if it could not use
421 the initial value of r14 and therefore depends on r14 saved
423 bool save_return_addr_p;
425 /* Size of stack frame. */
426 HOST_WIDE_INT frame_size;
429 /* Define the structure for the machine field in struct function. */
431 struct GTY(()) machine_function
433 struct s390_frame_layout frame_layout;
435 /* Literal pool base register. */
438 /* True if we may need to perform branch splitting. */
439 bool split_branches_pending_p;
441 bool has_landing_pad_p;
443 /* True if the current function may contain a tbegin clobbering
447 /* For -fsplit-stack support: A stack local which holds a pointer to
448 the stack arguments for a function with a variable number of
449 arguments. This is set at the start of the function and is used
450 to initialize the overflow_arg_area field of the va_list
452 rtx split_stack_varargs_pointer;
455 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
457 #define cfun_frame_layout (cfun->machine->frame_layout)
458 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
459 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
460 ? cfun_frame_layout.fpr_bitmap & 0x0f \
461 : cfun_frame_layout.fpr_bitmap & 0x03))
462 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
463 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
464 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
465 (1 << (REGNO - FPR0_REGNUM)))
466 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
467 (1 << (REGNO - FPR0_REGNUM))))
468 #define cfun_gpr_save_slot(REGNO) \
469 cfun->machine->frame_layout.gpr_save_slots[REGNO]
471 /* Number of GPRs and FPRs used for argument passing. */
472 #define GP_ARG_NUM_REG 5
473 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
474 #define VEC_ARG_NUM_REG 8
476 /* A couple of shortcuts. */
477 #define CONST_OK_FOR_J(x) \
478 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
479 #define CONST_OK_FOR_K(x) \
480 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
481 #define CONST_OK_FOR_Os(x) \
482 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
483 #define CONST_OK_FOR_Op(x) \
484 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
485 #define CONST_OK_FOR_On(x) \
486 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
488 #define REGNO_PAIR_OK(REGNO, MODE) \
489 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
491 /* That's the read ahead of the dynamic branch prediction unit in
492 bytes on a z10 (or higher) CPU. */
493 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
496 /* Indicate which ABI has been used for passing vector args.
497 0 - no vector type arguments have been passed where the ABI is relevant
498 1 - the old ABI has been used
499 2 - a vector type argument has been passed either in a vector register
500 or on the stack by value */
501 static int s390_vector_abi = 0;
503 /* Set the vector ABI marker if TYPE is subject to the vector ABI
504 switch. The vector ABI affects only vector data types. There are
505 two aspects of the vector ABI relevant here:
507 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
508 ABI and natural alignment with the old.
510 2. vector <= 16 bytes are passed in VRs or by value on the stack
511 with the new ABI but by reference on the stack with the old.
513 If ARG_P is true TYPE is used for a function argument or return
514 value. The ABI marker then is set for all vector data types. If
515 ARG_P is false only type 1 vectors are being checked. */
518 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
520 static hash_set<const_tree> visited_types_hash;
525 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
528 if (visited_types_hash.contains (type))
531 visited_types_hash.add (type);
533 if (VECTOR_TYPE_P (type))
535 int type_size = int_size_in_bytes (type);
537 /* Outside arguments only the alignment is changing and this
538 only happens for vector types >= 16 bytes. */
539 if (!arg_p && type_size < 16)
542 /* In arguments vector types > 16 are passed as before (GCC
543 never enforced the bigger alignment for arguments which was
544 required by the old vector ABI). However, it might still be
545 ABI relevant due to the changed alignment if it is a struct
547 if (arg_p && type_size > 16 && !in_struct_p)
550 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
552 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
554 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
555 natural alignment there will never be ABI dependent padding
556 in an array type. That's why we do not set in_struct_p to
558 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
560 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
564 /* Check the return type. */
565 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
567 for (arg_chain = TYPE_ARG_TYPES (type);
569 arg_chain = TREE_CHAIN (arg_chain))
570 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
572 else if (RECORD_OR_UNION_TYPE_P (type))
576 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
578 if (TREE_CODE (field) != FIELD_DECL)
581 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
587 /* System z builtins. */
589 #include "s390-builtins.h"
591 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
596 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
598 #define OB_DEF_VAR(...)
599 #include "s390-builtins.def"
603 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
608 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
610 #define OB_DEF_VAR(...)
611 #include "s390-builtins.def"
615 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
621 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
622 #define OB_DEF_VAR(...)
623 #include "s390-builtins.def"
628 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
635 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
636 #include "s390-builtins.def"
641 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
648 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
649 #include "s390-builtins.def"
653 tree s390_builtin_types[BT_MAX];
654 tree s390_builtin_fn_types[BT_FN_MAX];
655 tree s390_builtin_decls[S390_BUILTIN_MAX +
656 S390_OVERLOADED_BUILTIN_MAX +
657 S390_OVERLOADED_BUILTIN_VAR_MAX];
659 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
663 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
665 #define OB_DEF_VAR(...)
667 #include "s390-builtins.def"
672 s390_init_builtins (void)
674 /* These definitions are being used in s390-builtins.def. */
675 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
677 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
678 tree c_uint64_type_node;
680 /* The uint64_type_node from tree.c is not compatible to the C99
681 uint64_t data type. What we want is c_uint64_type_node from
682 c-common.c. But since backend code is not supposed to interface
683 with the frontend we recreate it here. */
685 c_uint64_type_node = long_unsigned_type_node;
687 c_uint64_type_node = long_long_unsigned_type_node;
690 #define DEF_TYPE(INDEX, NODE, CONST_P) \
691 if (s390_builtin_types[INDEX] == NULL) \
692 s390_builtin_types[INDEX] = (!CONST_P) ? \
693 (NODE) : build_type_variant ((NODE), 1, 0);
695 #undef DEF_POINTER_TYPE
696 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
697 if (s390_builtin_types[INDEX] == NULL) \
698 s390_builtin_types[INDEX] = \
699 build_pointer_type (s390_builtin_types[INDEX_BASE]);
701 #undef DEF_DISTINCT_TYPE
702 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
703 if (s390_builtin_types[INDEX] == NULL) \
704 s390_builtin_types[INDEX] = \
705 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
707 #undef DEF_VECTOR_TYPE
708 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
709 if (s390_builtin_types[INDEX] == NULL) \
710 s390_builtin_types[INDEX] = \
711 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
713 #undef DEF_OPAQUE_VECTOR_TYPE
714 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
715 if (s390_builtin_types[INDEX] == NULL) \
716 s390_builtin_types[INDEX] = \
717 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
720 #define DEF_FN_TYPE(INDEX, args...) \
721 if (s390_builtin_fn_types[INDEX] == NULL) \
722 s390_builtin_fn_types[INDEX] = \
723 build_function_type_list (args, NULL_TREE);
725 #define DEF_OV_TYPE(...)
726 #include "s390-builtin-types.def"
729 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
730 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
731 s390_builtin_decls[S390_BUILTIN_##NAME] = \
732 add_builtin_function ("__builtin_" #NAME, \
733 s390_builtin_fn_types[FNTYPE], \
734 S390_BUILTIN_##NAME, \
739 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
740 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
742 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
743 add_builtin_function ("__builtin_" #NAME, \
744 s390_builtin_fn_types[FNTYPE], \
745 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
750 #define OB_DEF_VAR(...)
751 #include "s390-builtins.def"
755 /* Return true if ARG is appropriate as argument number ARGNUM of
756 builtin DECL. The operand flags from s390-builtins.def have to
757 passed as OP_FLAGS. */
759 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
761 if (O_UIMM_P (op_flags))
763 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
764 int bitwidth = bitwidths[op_flags - O_U1];
766 if (!tree_fits_uhwi_p (arg)
767 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
769 error("constant argument %d for builtin %qF is out of range (0.."
770 HOST_WIDE_INT_PRINT_UNSIGNED ")",
772 (HOST_WIDE_INT_1U << bitwidth) - 1);
777 if (O_SIMM_P (op_flags))
779 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
780 int bitwidth = bitwidths[op_flags - O_S2];
782 if (!tree_fits_shwi_p (arg)
783 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
784 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
786 error("constant argument %d for builtin %qF is out of range ("
787 HOST_WIDE_INT_PRINT_DEC ".."
788 HOST_WIDE_INT_PRINT_DEC ")",
790 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
791 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
798 /* Expand an expression EXP that calls a built-in function,
799 with result going to TARGET if that's convenient
800 (and in mode MODE if that's convenient).
801 SUBTARGET may be used as the target for computing one of EXP's operands.
802 IGNORE is nonzero if the value is to be ignored. */
805 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
806 machine_mode mode ATTRIBUTE_UNUSED,
807 int ignore ATTRIBUTE_UNUSED)
811 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
812 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
813 enum insn_code icode;
814 rtx op[MAX_ARGS], pat;
818 call_expr_arg_iterator iter;
819 unsigned int all_op_flags = opflags_for_builtin (fcode);
820 machine_mode last_vec_mode = VOIDmode;
822 if (TARGET_DEBUG_ARG)
825 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
826 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
827 bflags_for_builtin (fcode));
830 if (S390_USE_TARGET_ATTRIBUTE)
834 bflags = bflags_for_builtin (fcode);
835 if ((bflags & B_HTM) && !TARGET_HTM)
837 error ("builtin %qF is not supported without -mhtm "
838 "(default with -march=zEC12 and higher).", fndecl);
841 if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
843 error ("builtin %qF requires -mvx "
844 "(default with -march=z13 and higher).", fndecl);
848 if ((bflags & B_VXE) && !TARGET_VXE)
850 error ("Builtin %qF requires arch12 or higher.", fndecl);
854 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
855 && fcode < S390_ALL_BUILTIN_MAX)
859 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
861 icode = code_for_builtin[fcode];
862 /* Set a flag in the machine specific cfun part in order to support
863 saving/restoring of FPRs. */
864 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
865 cfun->machine->tbegin_p = true;
867 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
869 error ("unresolved overloaded builtin");
873 internal_error ("bad builtin fcode");
876 internal_error ("bad builtin icode");
878 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
882 machine_mode tmode = insn_data[icode].operand[0].mode;
884 || GET_MODE (target) != tmode
885 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
886 target = gen_reg_rtx (tmode);
888 /* There are builtins (e.g. vec_promote) with no vector
889 arguments but an element selector. So we have to also look
890 at the vector return type when emitting the modulo
892 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
893 last_vec_mode = insn_data[icode].operand[0].mode;
897 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
900 const struct insn_operand_data *insn_op;
901 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
903 all_op_flags = all_op_flags >> O_SHIFT;
905 if (arg == error_mark_node)
907 if (arity >= MAX_ARGS)
910 if (O_IMM_P (op_flags)
911 && TREE_CODE (arg) != INTEGER_CST)
913 error ("constant value required for builtin %qF argument %d",
918 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
921 insn_op = &insn_data[icode].operand[arity + nonvoid];
922 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
924 /* expand_expr truncates constants to the target mode only if it
925 is "convenient". However, our checks below rely on this
927 if (CONST_INT_P (op[arity])
928 && SCALAR_INT_MODE_P (insn_op->mode)
929 && GET_MODE (op[arity]) != insn_op->mode)
930 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
933 /* Wrap the expanded RTX for pointer types into a MEM expr with
934 the proper mode. This allows us to use e.g. (match_operand
935 "memory_operand"..) in the insn patterns instead of (mem
936 (match_operand "address_operand)). This is helpful for
937 patterns not just accepting MEMs. */
938 if (POINTER_TYPE_P (TREE_TYPE (arg))
939 && insn_op->predicate != address_operand)
940 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
942 /* Expand the module operation required on element selectors. */
943 if (op_flags == O_ELEM)
945 gcc_assert (last_vec_mode != VOIDmode);
946 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
948 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
949 NULL_RTX, 1, OPTAB_DIRECT);
952 /* Record the vector mode used for an element selector. This assumes:
953 1. There is no builtin with two different vector modes and an element selector
954 2. The element selector comes after the vector type it is referring to.
955 This currently the true for all the builtins but FIXME we
956 should better check for that. */
957 if (VECTOR_MODE_P (insn_op->mode))
958 last_vec_mode = insn_op->mode;
960 if (insn_op->predicate (op[arity], insn_op->mode))
966 if (MEM_P (op[arity])
967 && insn_op->predicate == memory_operand
968 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
969 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
971 op[arity] = replace_equiv_address (op[arity],
972 copy_to_mode_reg (Pmode,
973 XEXP (op[arity], 0)));
975 /* Some of the builtins require different modes/types than the
976 pattern in order to implement a specific API. Instead of
977 adding many expanders which do the mode change we do it here.
978 E.g. s390_vec_add_u128 required to have vector unsigned char
979 arguments is mapped to addti3. */
980 else if (insn_op->mode != VOIDmode
981 && GET_MODE (op[arity]) != VOIDmode
982 && GET_MODE (op[arity]) != insn_op->mode
983 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
984 GET_MODE (op[arity]), 0))
989 else if (GET_MODE (op[arity]) == insn_op->mode
990 || GET_MODE (op[arity]) == VOIDmode
991 || (insn_op->predicate == address_operand
992 && GET_MODE (op[arity]) == Pmode))
994 /* An address_operand usually has VOIDmode in the expander
995 so we cannot use this. */
996 machine_mode target_mode =
997 (insn_op->predicate == address_operand
998 ? Pmode : insn_op->mode);
999 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
1002 if (!insn_op->predicate (op[arity], insn_op->mode))
1004 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
1013 pat = GEN_FCN (icode) (target);
1017 pat = GEN_FCN (icode) (target, op[0]);
1019 pat = GEN_FCN (icode) (op[0]);
1023 pat = GEN_FCN (icode) (target, op[0], op[1]);
1025 pat = GEN_FCN (icode) (op[0], op[1]);
1029 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1031 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1035 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1037 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1041 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1043 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1047 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1049 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1065 static const int s390_hotpatch_hw_max = 1000000;
1066 static int s390_hotpatch_hw_before_label = 0;
1067 static int s390_hotpatch_hw_after_label = 0;
1069 /* Check whether the hotpatch attribute is applied to a function and, if it has
1070 an argument, the argument is valid. */
1073 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1074 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1080 if (TREE_CODE (*node) != FUNCTION_DECL)
1082 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1084 *no_add_attrs = true;
1086 if (args != NULL && TREE_CHAIN (args) != NULL)
1088 expr = TREE_VALUE (args);
1089 expr2 = TREE_VALUE (TREE_CHAIN (args));
1091 if (args == NULL || TREE_CHAIN (args) == NULL)
1093 else if (TREE_CODE (expr) != INTEGER_CST
1094 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1095 || wi::gtu_p (expr, s390_hotpatch_hw_max))
1097 else if (TREE_CODE (expr2) != INTEGER_CST
1098 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1099 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
1105 error ("requested %qE attribute is not a comma separated pair of"
1106 " non-negative integer constants or too large (max. %d)", name,
1107 s390_hotpatch_hw_max);
1108 *no_add_attrs = true;
1114 /* Expand the s390_vector_bool type attribute. */
1117 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1118 tree args ATTRIBUTE_UNUSED,
1119 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1121 tree type = *node, result = NULL_TREE;
1124 while (POINTER_TYPE_P (type)
1125 || TREE_CODE (type) == FUNCTION_TYPE
1126 || TREE_CODE (type) == METHOD_TYPE
1127 || TREE_CODE (type) == ARRAY_TYPE)
1128 type = TREE_TYPE (type);
1130 mode = TYPE_MODE (type);
1133 case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break;
1134 case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break;
1135 case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break;
1136 case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI];
1140 *no_add_attrs = true; /* No need to hang on to the attribute. */
1143 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1148 static const struct attribute_spec s390_attribute_table[] = {
1149 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
1150 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
1152 { NULL, 0, 0, false, false, false, NULL, false }
1155 /* Return the alignment for LABEL. We default to the -falign-labels
1156 value except for the literal pool base label. */
1158 s390_label_align (rtx_insn *label)
1160 rtx_insn *prev_insn = prev_active_insn (label);
1163 if (prev_insn == NULL_RTX)
1166 set = single_set (prev_insn);
1168 if (set == NULL_RTX)
1171 src = SET_SRC (set);
1173 /* Don't align literal pool base labels. */
1174 if (GET_CODE (src) == UNSPEC
1175 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1179 return align_labels_log;
1183 s390_libgcc_cmp_return_mode (void)
1185 return TARGET_64BIT ? DImode : SImode;
1189 s390_libgcc_shift_count_mode (void)
1191 return TARGET_64BIT ? DImode : SImode;
1195 s390_unwind_word_mode (void)
1197 return TARGET_64BIT ? DImode : SImode;
1200 /* Return true if the back end supports mode MODE. */
1202 s390_scalar_mode_supported_p (machine_mode mode)
1204 /* In contrast to the default implementation reject TImode constants on 31bit
1205 TARGET_ZARCH for ABI compliance. */
1206 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1209 if (DECIMAL_FLOAT_MODE_P (mode))
1210 return default_decimal_float_supported_p ();
1212 return default_scalar_mode_supported_p (mode);
1215 /* Return true if the back end supports vector mode MODE. */
1217 s390_vector_mode_supported_p (machine_mode mode)
1221 if (!VECTOR_MODE_P (mode)
1223 || GET_MODE_SIZE (mode) > 16)
1226 inner = GET_MODE_INNER (mode);
1244 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1247 s390_set_has_landing_pad_p (bool value)
1249 cfun->machine->has_landing_pad_p = value;
1252 /* If two condition code modes are compatible, return a condition code
1253 mode which is compatible with both. Otherwise, return
1257 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1265 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1266 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1287 /* Return true if SET either doesn't set the CC register, or else
1288 the source and destination have matching CC modes and that
1289 CC mode is at least as constrained as REQ_MODE. */
1292 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1294 machine_mode set_mode;
1296 gcc_assert (GET_CODE (set) == SET);
1298 /* These modes are supposed to be used only in CC consumer
1300 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1301 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1303 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1306 set_mode = GET_MODE (SET_DEST (set));
1326 if (req_mode != set_mode)
1331 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1332 && req_mode != CCSRmode && req_mode != CCURmode
1333 && req_mode != CCZ1mode)
1339 if (req_mode != CCAmode)
1347 return (GET_MODE (SET_SRC (set)) == set_mode);
1350 /* Return true if every SET in INSN that sets the CC register
1351 has source and destination with matching CC modes and that
1352 CC mode is at least as constrained as REQ_MODE.
1353 If REQ_MODE is VOIDmode, always return false. */
1356 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1360 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1361 if (req_mode == VOIDmode)
1364 if (GET_CODE (PATTERN (insn)) == SET)
1365 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1367 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1368 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1370 rtx set = XVECEXP (PATTERN (insn), 0, i);
1371 if (GET_CODE (set) == SET)
1372 if (!s390_match_ccmode_set (set, req_mode))
1379 /* If a test-under-mask instruction can be used to implement
1380 (compare (and ... OP1) OP2), return the CC mode required
1381 to do that. Otherwise, return VOIDmode.
1382 MIXED is true if the instruction can distinguish between
1383 CC1 and CC2 for mixed selected bits (TMxx), it is false
1384 if the instruction cannot (TM). */
1387 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1391 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1392 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1395 /* Selected bits all zero: CC0.
1396 e.g.: int a; if ((a & (16 + 128)) == 0) */
1397 if (INTVAL (op2) == 0)
1400 /* Selected bits all one: CC3.
1401 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1402 if (INTVAL (op2) == INTVAL (op1))
1405 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1407 if ((a & (16 + 128)) == 16) -> CCT1
1408 if ((a & (16 + 128)) == 128) -> CCT2 */
1411 bit1 = exact_log2 (INTVAL (op2));
1412 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1413 if (bit0 != -1 && bit1 != -1)
1414 return bit0 > bit1 ? CCT1mode : CCT2mode;
1420 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1421 OP0 and OP1 of a COMPARE, return the mode to be used for the
1425 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1431 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1432 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1434 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1435 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1437 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1438 || GET_CODE (op1) == NEG)
1439 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1442 if (GET_CODE (op0) == AND)
1444 /* Check whether we can potentially do it via TM. */
1445 machine_mode ccmode;
1446 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1447 if (ccmode != VOIDmode)
1449 /* Relax CCTmode to CCZmode to allow fall-back to AND
1450 if that turns out to be beneficial. */
1451 return ccmode == CCTmode ? CCZmode : ccmode;
1455 if (register_operand (op0, HImode)
1456 && GET_CODE (op1) == CONST_INT
1457 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1459 if (register_operand (op0, QImode)
1460 && GET_CODE (op1) == CONST_INT
1461 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1470 /* The only overflow condition of NEG and ABS happens when
1471 -INT_MAX is used as parameter, which stays negative. So
1472 we have an overflow from a positive value to a negative.
1473 Using CCAP mode the resulting cc can be used for comparisons. */
1474 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1475 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1478 /* If constants are involved in an add instruction it is possible to use
1479 the resulting cc for comparisons with zero. Knowing the sign of the
1480 constant the overflow behavior gets predictable. e.g.:
1481 int a, b; if ((b = a + c) > 0)
1482 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1483 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1484 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1485 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1486 /* Avoid INT32_MIN on 32 bit. */
1487 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1489 if (INTVAL (XEXP((op0), 1)) < 0)
1503 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1504 && GET_CODE (op1) != CONST_INT)
1510 if (GET_CODE (op0) == PLUS
1511 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1514 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1515 && GET_CODE (op1) != CONST_INT)
1521 if (GET_CODE (op0) == MINUS
1522 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1525 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1526 && GET_CODE (op1) != CONST_INT)
1535 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1536 that we can implement more efficiently. */
1539 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1540 bool op0_preserve_value)
1542 if (op0_preserve_value)
1545 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1546 if ((*code == EQ || *code == NE)
1547 && *op1 == const0_rtx
1548 && GET_CODE (*op0) == ZERO_EXTRACT
1549 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1550 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1551 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1553 rtx inner = XEXP (*op0, 0);
1554 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1555 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1556 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1558 if (len > 0 && len < modesize
1559 && pos >= 0 && pos + len <= modesize
1560 && modesize <= HOST_BITS_PER_WIDE_INT)
1562 unsigned HOST_WIDE_INT block;
1563 block = (HOST_WIDE_INT_1U << len) - 1;
1564 block <<= modesize - pos - len;
1566 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1567 gen_int_mode (block, GET_MODE (inner)));
1571 /* Narrow AND of memory against immediate to enable TM. */
1572 if ((*code == EQ || *code == NE)
1573 && *op1 == const0_rtx
1574 && GET_CODE (*op0) == AND
1575 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1576 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1578 rtx inner = XEXP (*op0, 0);
1579 rtx mask = XEXP (*op0, 1);
1581 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1582 if (GET_CODE (inner) == SUBREG
1583 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1584 && (GET_MODE_SIZE (GET_MODE (inner))
1585 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1587 & GET_MODE_MASK (GET_MODE (inner))
1588 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1590 inner = SUBREG_REG (inner);
1592 /* Do not change volatile MEMs. */
1593 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1595 int part = s390_single_part (XEXP (*op0, 1),
1596 GET_MODE (inner), QImode, 0);
1599 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1600 inner = adjust_address_nv (inner, QImode, part);
1601 *op0 = gen_rtx_AND (QImode, inner, mask);
1606 /* Narrow comparisons against 0xffff to HImode if possible. */
1607 if ((*code == EQ || *code == NE)
1608 && GET_CODE (*op1) == CONST_INT
1609 && INTVAL (*op1) == 0xffff
1610 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1611 && (nonzero_bits (*op0, GET_MODE (*op0))
1612 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1614 *op0 = gen_lowpart (HImode, *op0);
1618 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1619 if (GET_CODE (*op0) == UNSPEC
1620 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1621 && XVECLEN (*op0, 0) == 1
1622 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1623 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1624 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1625 && *op1 == const0_rtx)
1627 enum rtx_code new_code = UNKNOWN;
1630 case EQ: new_code = EQ; break;
1631 case NE: new_code = NE; break;
1632 case LT: new_code = GTU; break;
1633 case GT: new_code = LTU; break;
1634 case LE: new_code = GEU; break;
1635 case GE: new_code = LEU; break;
1639 if (new_code != UNKNOWN)
1641 *op0 = XVECEXP (*op0, 0, 0);
1646 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1647 if (GET_CODE (*op0) == UNSPEC
1648 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1649 && XVECLEN (*op0, 0) == 1
1650 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1651 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1652 && CONST_INT_P (*op1))
1654 enum rtx_code new_code = UNKNOWN;
1655 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1661 case EQ: new_code = EQ; break;
1662 case NE: new_code = NE; break;
1669 if (new_code != UNKNOWN)
1671 /* For CCRAWmode put the required cc mask into the second
1673 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1674 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1675 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1676 *op0 = XVECEXP (*op0, 0, 0);
1681 /* Simplify cascaded EQ, NE with const0_rtx. */
1682 if ((*code == NE || *code == EQ)
1683 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1684 && GET_MODE (*op0) == SImode
1685 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1686 && REG_P (XEXP (*op0, 0))
1687 && XEXP (*op0, 1) == const0_rtx
1688 && *op1 == const0_rtx)
1690 if ((*code == EQ && GET_CODE (*op0) == NE)
1691 || (*code == NE && GET_CODE (*op0) == EQ))
1695 *op0 = XEXP (*op0, 0);
1698 /* Prefer register over memory as first operand. */
1699 if (MEM_P (*op0) && REG_P (*op1))
1701 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1702 *code = (int)swap_condition ((enum rtx_code)*code);
1705 /* A comparison result is compared against zero. Replace it with
1706 the (perhaps inverted) original comparison.
1707 This probably should be done by simplify_relational_operation. */
1708 if ((*code == EQ || *code == NE)
1709 && *op1 == const0_rtx
1710 && COMPARISON_P (*op0)
1711 && CC_REG_P (XEXP (*op0, 0)))
1713 enum rtx_code new_code;
1716 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1718 XEXP (*op1, 0), NULL);
1720 new_code = GET_CODE (*op0);
1722 if (new_code != UNKNOWN)
1725 *op1 = XEXP (*op0, 1);
1726 *op0 = XEXP (*op0, 0);
1732 /* Emit a compare instruction suitable to implement the comparison
1733 OP0 CODE OP1. Return the correct condition RTL to be placed in
1734 the IF_THEN_ELSE of the conditional branch testing the result. */
1737 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1739 machine_mode mode = s390_select_ccmode (code, op0, op1);
1742 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1744 /* Do not output a redundant compare instruction if a
1745 compare_and_swap pattern already computed the result and the
1746 machine modes are compatible. */
1747 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1753 cc = gen_rtx_REG (mode, CC_REGNUM);
1754 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1757 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1760 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1762 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1763 conditional branch testing the result. */
1766 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1767 rtx cmp, rtx new_rtx, machine_mode ccmode)
1771 cc = gen_rtx_REG (ccmode, CC_REGNUM);
1772 switch (GET_MODE (mem))
1775 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1779 emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1783 emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1791 return s390_emit_compare (code, cc, const0_rtx);
1794 /* Emit a jump instruction to TARGET and return it. If COND is
1795 NULL_RTX, emit an unconditional jump, else a conditional jump under
1799 s390_emit_jump (rtx target, rtx cond)
1803 target = gen_rtx_LABEL_REF (VOIDmode, target);
1805 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1807 insn = gen_rtx_SET (pc_rtx, target);
1808 return emit_jump_insn (insn);
1811 /* Return branch condition mask to implement a branch
1812 specified by CODE. Return -1 for invalid comparisons. */
1815 s390_branch_condition_mask (rtx code)
1817 const int CC0 = 1 << 3;
1818 const int CC1 = 1 << 2;
1819 const int CC2 = 1 << 1;
1820 const int CC3 = 1 << 0;
1822 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1823 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1824 gcc_assert (XEXP (code, 1) == const0_rtx
1825 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1826 && CONST_INT_P (XEXP (code, 1))));
1829 switch (GET_MODE (XEXP (code, 0)))
1833 switch (GET_CODE (code))
1835 case EQ: return CC0;
1836 case NE: return CC1 | CC2 | CC3;
1842 switch (GET_CODE (code))
1844 case EQ: return CC1;
1845 case NE: return CC0 | CC2 | CC3;
1851 switch (GET_CODE (code))
1853 case EQ: return CC2;
1854 case NE: return CC0 | CC1 | CC3;
1860 switch (GET_CODE (code))
1862 case EQ: return CC3;
1863 case NE: return CC0 | CC1 | CC2;
1869 switch (GET_CODE (code))
1871 case EQ: return CC0 | CC2;
1872 case NE: return CC1 | CC3;
1878 switch (GET_CODE (code))
1880 case LTU: return CC2 | CC3; /* carry */
1881 case GEU: return CC0 | CC1; /* no carry */
1887 switch (GET_CODE (code))
1889 case GTU: return CC0 | CC1; /* borrow */
1890 case LEU: return CC2 | CC3; /* no borrow */
1896 switch (GET_CODE (code))
1898 case EQ: return CC0 | CC2;
1899 case NE: return CC1 | CC3;
1900 case LTU: return CC1;
1901 case GTU: return CC3;
1902 case LEU: return CC1 | CC2;
1903 case GEU: return CC2 | CC3;
1908 switch (GET_CODE (code))
1910 case EQ: return CC0;
1911 case NE: return CC1 | CC2 | CC3;
1912 case LTU: return CC1;
1913 case GTU: return CC2;
1914 case LEU: return CC0 | CC1;
1915 case GEU: return CC0 | CC2;
1921 switch (GET_CODE (code))
1923 case EQ: return CC0;
1924 case NE: return CC2 | CC1 | CC3;
1925 case LTU: return CC2;
1926 case GTU: return CC1;
1927 case LEU: return CC0 | CC2;
1928 case GEU: return CC0 | CC1;
1934 switch (GET_CODE (code))
1936 case EQ: return CC0;
1937 case NE: return CC1 | CC2 | CC3;
1938 case LT: return CC1 | CC3;
1939 case GT: return CC2;
1940 case LE: return CC0 | CC1 | CC3;
1941 case GE: return CC0 | CC2;
1947 switch (GET_CODE (code))
1949 case EQ: return CC0;
1950 case NE: return CC1 | CC2 | CC3;
1951 case LT: return CC1;
1952 case GT: return CC2 | CC3;
1953 case LE: return CC0 | CC1;
1954 case GE: return CC0 | CC2 | CC3;
1960 switch (GET_CODE (code))
1962 case EQ: return CC0;
1963 case NE: return CC1 | CC2 | CC3;
1964 case LT: return CC1;
1965 case GT: return CC2;
1966 case LE: return CC0 | CC1;
1967 case GE: return CC0 | CC2;
1968 case UNORDERED: return CC3;
1969 case ORDERED: return CC0 | CC1 | CC2;
1970 case UNEQ: return CC0 | CC3;
1971 case UNLT: return CC1 | CC3;
1972 case UNGT: return CC2 | CC3;
1973 case UNLE: return CC0 | CC1 | CC3;
1974 case UNGE: return CC0 | CC2 | CC3;
1975 case LTGT: return CC1 | CC2;
1981 switch (GET_CODE (code))
1983 case EQ: return CC0;
1984 case NE: return CC2 | CC1 | CC3;
1985 case LT: return CC2;
1986 case GT: return CC1;
1987 case LE: return CC0 | CC2;
1988 case GE: return CC0 | CC1;
1989 case UNORDERED: return CC3;
1990 case ORDERED: return CC0 | CC2 | CC1;
1991 case UNEQ: return CC0 | CC3;
1992 case UNLT: return CC2 | CC3;
1993 case UNGT: return CC1 | CC3;
1994 case UNLE: return CC0 | CC2 | CC3;
1995 case UNGE: return CC0 | CC1 | CC3;
1996 case LTGT: return CC2 | CC1;
2001 /* Vector comparison modes. */
2002 /* CC2 will never be set. It however is part of the negated
2005 switch (GET_CODE (code))
2010 case GE: return CC0;
2011 /* The inverted modes are in fact *any* modes. */
2015 case LT: return CC3 | CC1 | CC2;
2020 switch (GET_CODE (code))
2025 case GE: return CC0 | CC1;
2026 /* The inverted modes are in fact *all* modes. */
2030 case LT: return CC3 | CC2;
2034 switch (GET_CODE (code))
2038 case GE: return CC0;
2039 /* The inverted modes are in fact *any* modes. */
2042 case UNLT: return CC3 | CC1 | CC2;
2047 switch (GET_CODE (code))
2051 case GE: return CC0 | CC1;
2052 /* The inverted modes are in fact *all* modes. */
2055 case UNLT: return CC3 | CC2;
2060 switch (GET_CODE (code))
2063 return INTVAL (XEXP (code, 1));
2065 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2076 /* Return branch condition mask to implement a compare and branch
2077 specified by CODE. Return -1 for invalid comparisons. */
2080 s390_compare_and_branch_condition_mask (rtx code)
2082 const int CC0 = 1 << 3;
2083 const int CC1 = 1 << 2;
2084 const int CC2 = 1 << 1;
2086 switch (GET_CODE (code))
2110 /* If INV is false, return assembler mnemonic string to implement
2111 a branch specified by CODE. If INV is true, return mnemonic
2112 for the corresponding inverted branch. */
2115 s390_branch_condition_mnemonic (rtx code, int inv)
2119 static const char *const mnemonic[16] =
2121 NULL, "o", "h", "nle",
2122 "l", "nhe", "lh", "ne",
2123 "e", "nlh", "he", "nl",
2124 "le", "nh", "no", NULL
2127 if (GET_CODE (XEXP (code, 0)) == REG
2128 && REGNO (XEXP (code, 0)) == CC_REGNUM
2129 && (XEXP (code, 1) == const0_rtx
2130 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2131 && CONST_INT_P (XEXP (code, 1)))))
2132 mask = s390_branch_condition_mask (code);
2134 mask = s390_compare_and_branch_condition_mask (code);
2136 gcc_assert (mask >= 0);
2141 gcc_assert (mask >= 1 && mask <= 14);
2143 return mnemonic[mask];
2146 /* Return the part of op which has a value different from def.
2147 The size of the part is determined by mode.
2148 Use this function only if you already know that op really
2149 contains such a part. */
2151 unsigned HOST_WIDE_INT
2152 s390_extract_part (rtx op, machine_mode mode, int def)
2154 unsigned HOST_WIDE_INT value = 0;
2155 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2156 int part_bits = GET_MODE_BITSIZE (mode);
2157 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2160 for (i = 0; i < max_parts; i++)
2163 value = UINTVAL (op);
2165 value >>= part_bits;
2167 if ((value & part_mask) != (def & part_mask))
2168 return value & part_mask;
2174 /* If OP is an integer constant of mode MODE with exactly one
2175 part of mode PART_MODE unequal to DEF, return the number of that
2176 part. Otherwise, return -1. */
2179 s390_single_part (rtx op,
2181 machine_mode part_mode,
2184 unsigned HOST_WIDE_INT value = 0;
2185 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2186 unsigned HOST_WIDE_INT part_mask
2187 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2190 if (GET_CODE (op) != CONST_INT)
2193 for (i = 0; i < n_parts; i++)
2196 value = UINTVAL (op);
2198 value >>= GET_MODE_BITSIZE (part_mode);
2200 if ((value & part_mask) != (def & part_mask))
2208 return part == -1 ? -1 : n_parts - 1 - part;
2211 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2212 bits and no other bits are set in (the lower SIZE bits of) IN.
2214 PSTART and PEND can be used to obtain the start and end
2215 position (inclusive) of the bitfield relative to 64
2216 bits. *PSTART / *PEND gives the position of the first/last bit
2217 of the bitfield counting from the highest order bit starting
2221 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2222 int *pstart, int *pend)
2226 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2227 int highbit = HOST_BITS_PER_WIDE_INT - size;
2228 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2230 gcc_assert (!!pstart == !!pend);
2231 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2234 /* Look for the rightmost bit of a contiguous range of ones. */
2241 /* Look for the firt zero bit after the range of ones. */
2242 if (! (bitmask & in))
2246 /* We're one past the last one-bit. */
2250 /* No one bits found. */
2253 if (start > highbit)
2255 unsigned HOST_WIDE_INT mask;
2257 /* Calculate a mask for all bits beyond the contiguous bits. */
2258 mask = ((~HOST_WIDE_INT_0U >> highbit)
2259 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2261 /* There are more bits set beyond the first range of one bits. */
2274 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2275 if ~IN contains a contiguous bitfield. In that case, *END is <
2278 If WRAP_P is true, a bitmask that wraps around is also tested.
2279 When a wraparoud occurs *START is greater than *END (in
2280 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2281 part of the range. If WRAP_P is false, no wraparound is
2285 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2286 int size, int *start, int *end)
2288 int bs = HOST_BITS_PER_WIDE_INT;
2291 gcc_assert (!!start == !!end);
2292 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2293 /* This cannot be expressed as a contiguous bitmask. Exit early because
2294 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2297 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2302 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2308 gcc_assert (s >= 1);
2309 *start = ((e + 1) & (bs - 1));
2310 *end = ((s - 1 + bs) & (bs - 1));
2316 /* Return true if OP contains the same contiguous bitfield in *all*
2317 its elements. START and END can be used to obtain the start and
2318 end position of the bitfield.
2320 START/STOP give the position of the first/last bit of the bitfield
2321 counting from the lowest order bit starting with zero. In order to
2322 use these values for S/390 instructions this has to be converted to
2323 "bits big endian" style. */
2326 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2328 unsigned HOST_WIDE_INT mask;
2333 gcc_assert (!!start == !!end);
2334 if (!const_vec_duplicate_p (op, &elt)
2335 || !CONST_INT_P (elt))
2338 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2340 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2344 mask = UINTVAL (elt);
2346 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2351 *start -= (HOST_BITS_PER_WIDE_INT - size);
2352 *end -= (HOST_BITS_PER_WIDE_INT - size);
2360 /* Return true if C consists only of byte chunks being either 0 or
2361 0xff. If MASK is !=NULL a byte mask is generated which is
2362 appropriate for the vector generate byte mask instruction. */
2365 s390_bytemask_vector_p (rtx op, unsigned *mask)
2368 unsigned tmp_mask = 0;
2369 int nunit, unit_size;
2371 if (!VECTOR_MODE_P (GET_MODE (op))
2372 || GET_CODE (op) != CONST_VECTOR
2373 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2376 nunit = GET_MODE_NUNITS (GET_MODE (op));
2377 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2379 for (i = 0; i < nunit; i++)
2381 unsigned HOST_WIDE_INT c;
2384 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2387 c = UINTVAL (XVECEXP (op, 0, i));
2388 for (j = 0; j < unit_size; j++)
2390 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2392 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2393 c = c >> BITS_PER_UNIT;
2403 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2404 equivalent to a shift followed by the AND. In particular, CONTIG
2405 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2406 for ROTL indicate a rotate to the right. */
2409 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2414 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2418 return (64 - end >= rotl);
2421 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2423 rotl = -rotl + (64 - bitsize);
2424 return (start >= rotl);
2428 /* Check whether we can (and want to) split a double-word
2429 move in mode MODE from SRC to DST into two single-word
2430 moves, moving the subword FIRST_SUBWORD first. */
2433 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2435 /* Floating point and vector registers cannot be split. */
2436 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2439 /* Non-offsettable memory references cannot be split. */
2440 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2441 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2444 /* Moving the first subword must not clobber a register
2445 needed to move the second subword. */
2446 if (register_operand (dst, mode))
2448 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2449 if (reg_overlap_mentioned_p (subreg, src))
2456 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2457 and [MEM2, MEM2 + SIZE] do overlap and false
2461 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2463 rtx addr1, addr2, addr_delta;
2464 HOST_WIDE_INT delta;
2466 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2472 addr1 = XEXP (mem1, 0);
2473 addr2 = XEXP (mem2, 0);
2475 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2477 /* This overlapping check is used by peepholes merging memory block operations.
2478 Overlapping operations would otherwise be recognized by the S/390 hardware
2479 and would fall back to a slower implementation. Allowing overlapping
2480 operations would lead to slow code but not to wrong code. Therefore we are
2481 somewhat optimistic if we cannot prove that the memory blocks are
2483 That's why we return false here although this may accept operations on
2484 overlapping memory areas. */
2485 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2488 delta = INTVAL (addr_delta);
2491 || (delta > 0 && delta < size)
2492 || (delta < 0 && -delta < size))
2498 /* Check whether the address of memory reference MEM2 equals exactly
2499 the address of memory reference MEM1 plus DELTA. Return true if
2500 we can prove this to be the case, false otherwise. */
2503 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2505 rtx addr1, addr2, addr_delta;
2507 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2510 addr1 = XEXP (mem1, 0);
2511 addr2 = XEXP (mem2, 0);
2513 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2514 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2520 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2523 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2526 machine_mode wmode = mode;
2527 rtx dst = operands[0];
2528 rtx src1 = operands[1];
2529 rtx src2 = operands[2];
2532 /* If we cannot handle the operation directly, use a temp register. */
2533 if (!s390_logical_operator_ok_p (operands))
2534 dst = gen_reg_rtx (mode);
2536 /* QImode and HImode patterns make sense only if we have a destination
2537 in memory. Otherwise perform the operation in SImode. */
2538 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2541 /* Widen operands if required. */
2544 if (GET_CODE (dst) == SUBREG
2545 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2547 else if (REG_P (dst))
2548 dst = gen_rtx_SUBREG (wmode, dst, 0);
2550 dst = gen_reg_rtx (wmode);
2552 if (GET_CODE (src1) == SUBREG
2553 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2555 else if (GET_MODE (src1) != VOIDmode)
2556 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2558 if (GET_CODE (src2) == SUBREG
2559 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2561 else if (GET_MODE (src2) != VOIDmode)
2562 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2565 /* Emit the instruction. */
2566 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2567 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2568 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2570 /* Fix up the destination if needed. */
2571 if (dst != operands[0])
2572 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2575 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2578 s390_logical_operator_ok_p (rtx *operands)
2580 /* If the destination operand is in memory, it needs to coincide
2581 with one of the source operands. After reload, it has to be
2582 the first source operand. */
2583 if (GET_CODE (operands[0]) == MEM)
2584 return rtx_equal_p (operands[0], operands[1])
2585 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2590 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2591 operand IMMOP to switch from SS to SI type instructions. */
2594 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2596 int def = code == AND ? -1 : 0;
2600 gcc_assert (GET_CODE (*memop) == MEM);
2601 gcc_assert (!MEM_VOLATILE_P (*memop));
2603 mask = s390_extract_part (*immop, QImode, def);
2604 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2605 gcc_assert (part >= 0);
2607 *memop = adjust_address (*memop, QImode, part);
2608 *immop = gen_int_mode (mask, QImode);
2612 /* How to allocate a 'struct machine_function'. */
2614 static struct machine_function *
2615 s390_init_machine_status (void)
2617 return ggc_cleared_alloc<machine_function> ();
2620 /* Map for smallest class containing reg regno. */
2622 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2623 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2624 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2625 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2626 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2627 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2628 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2629 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2630 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2631 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2632 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2633 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2634 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2635 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2636 VEC_REGS, VEC_REGS /* 52 */
2639 /* Return attribute type of insn. */
2641 static enum attr_type
2642 s390_safe_attr_type (rtx_insn *insn)
2644 if (recog_memoized (insn) >= 0)
2645 return get_attr_type (insn);
2650 /* Return true if DISP is a valid short displacement. */
2653 s390_short_displacement (rtx disp)
2655 /* No displacement is OK. */
2659 /* Without the long displacement facility we don't need to
2660 distingiush between long and short displacement. */
2661 if (!TARGET_LONG_DISPLACEMENT)
2664 /* Integer displacement in range. */
2665 if (GET_CODE (disp) == CONST_INT)
2666 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2668 /* GOT offset is not OK, the GOT can be large. */
2669 if (GET_CODE (disp) == CONST
2670 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2671 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2672 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2675 /* All other symbolic constants are literal pool references,
2676 which are OK as the literal pool must be small. */
2677 if (GET_CODE (disp) == CONST)
2683 /* Decompose a RTL expression ADDR for a memory address into
2684 its components, returned in OUT.
2686 Returns false if ADDR is not a valid memory address, true
2687 otherwise. If OUT is NULL, don't return the components,
2688 but check for validity only.
2690 Note: Only addresses in canonical form are recognized.
2691 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2692 canonical form so that they will be recognized. */
2695 s390_decompose_address (rtx addr, struct s390_address *out)
2697 HOST_WIDE_INT offset = 0;
2698 rtx base = NULL_RTX;
2699 rtx indx = NULL_RTX;
2700 rtx disp = NULL_RTX;
2702 bool pointer = false;
2703 bool base_ptr = false;
2704 bool indx_ptr = false;
2705 bool literal_pool = false;
2707 /* We may need to substitute the literal pool base register into the address
2708 below. However, at this point we do not know which register is going to
2709 be used as base, so we substitute the arg pointer register. This is going
2710 to be treated as holding a pointer below -- it shouldn't be used for any
2712 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2714 /* Decompose address into base + index + displacement. */
2716 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2719 else if (GET_CODE (addr) == PLUS)
2721 rtx op0 = XEXP (addr, 0);
2722 rtx op1 = XEXP (addr, 1);
2723 enum rtx_code code0 = GET_CODE (op0);
2724 enum rtx_code code1 = GET_CODE (op1);
2726 if (code0 == REG || code0 == UNSPEC)
2728 if (code1 == REG || code1 == UNSPEC)
2730 indx = op0; /* index + base */
2736 base = op0; /* base + displacement */
2741 else if (code0 == PLUS)
2743 indx = XEXP (op0, 0); /* index + base + disp */
2744 base = XEXP (op0, 1);
2755 disp = addr; /* displacement */
2757 /* Extract integer part of displacement. */
2761 if (GET_CODE (disp) == CONST_INT)
2763 offset = INTVAL (disp);
2766 else if (GET_CODE (disp) == CONST
2767 && GET_CODE (XEXP (disp, 0)) == PLUS
2768 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2770 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2771 disp = XEXP (XEXP (disp, 0), 0);
2775 /* Strip off CONST here to avoid special case tests later. */
2776 if (disp && GET_CODE (disp) == CONST)
2777 disp = XEXP (disp, 0);
2779 /* We can convert literal pool addresses to
2780 displacements by basing them off the base register. */
2781 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2786 base = fake_pool_base, literal_pool = true;
2788 /* Mark up the displacement. */
2789 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2790 UNSPEC_LTREL_OFFSET);
2793 /* Validate base register. */
2796 if (GET_CODE (base) == UNSPEC)
2797 switch (XINT (base, 1))
2801 disp = gen_rtx_UNSPEC (Pmode,
2802 gen_rtvec (1, XVECEXP (base, 0, 0)),
2803 UNSPEC_LTREL_OFFSET);
2807 base = XVECEXP (base, 0, 1);
2810 case UNSPEC_LTREL_BASE:
2811 if (XVECLEN (base, 0) == 1)
2812 base = fake_pool_base, literal_pool = true;
2814 base = XVECEXP (base, 0, 1);
2821 if (!REG_P (base) || GET_MODE (base) != Pmode)
2824 if (REGNO (base) == STACK_POINTER_REGNUM
2825 || REGNO (base) == FRAME_POINTER_REGNUM
2826 || ((reload_completed || reload_in_progress)
2827 && frame_pointer_needed
2828 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2829 || REGNO (base) == ARG_POINTER_REGNUM
2831 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2832 pointer = base_ptr = true;
2834 if ((reload_completed || reload_in_progress)
2835 && base == cfun->machine->base_reg)
2836 pointer = base_ptr = literal_pool = true;
2839 /* Validate index register. */
2842 if (GET_CODE (indx) == UNSPEC)
2843 switch (XINT (indx, 1))
2847 disp = gen_rtx_UNSPEC (Pmode,
2848 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2849 UNSPEC_LTREL_OFFSET);
2853 indx = XVECEXP (indx, 0, 1);
2856 case UNSPEC_LTREL_BASE:
2857 if (XVECLEN (indx, 0) == 1)
2858 indx = fake_pool_base, literal_pool = true;
2860 indx = XVECEXP (indx, 0, 1);
2867 if (!REG_P (indx) || GET_MODE (indx) != Pmode)
2870 if (REGNO (indx) == STACK_POINTER_REGNUM
2871 || REGNO (indx) == FRAME_POINTER_REGNUM
2872 || ((reload_completed || reload_in_progress)
2873 && frame_pointer_needed
2874 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2875 || REGNO (indx) == ARG_POINTER_REGNUM
2877 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2878 pointer = indx_ptr = true;
2880 if ((reload_completed || reload_in_progress)
2881 && indx == cfun->machine->base_reg)
2882 pointer = indx_ptr = literal_pool = true;
2885 /* Prefer to use pointer as base, not index. */
2886 if (base && indx && !base_ptr
2887 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2894 /* Validate displacement. */
2897 /* If virtual registers are involved, the displacement will change later
2898 anyway as the virtual registers get eliminated. This could make a
2899 valid displacement invalid, but it is more likely to make an invalid
2900 displacement valid, because we sometimes access the register save area
2901 via negative offsets to one of those registers.
2902 Thus we don't check the displacement for validity here. If after
2903 elimination the displacement turns out to be invalid after all,
2904 this is fixed up by reload in any case. */
2905 /* LRA maintains always displacements up to date and we need to
2906 know the displacement is right during all LRA not only at the
2907 final elimination. */
2909 || (base != arg_pointer_rtx
2910 && indx != arg_pointer_rtx
2911 && base != return_address_pointer_rtx
2912 && indx != return_address_pointer_rtx
2913 && base != frame_pointer_rtx
2914 && indx != frame_pointer_rtx
2915 && base != virtual_stack_vars_rtx
2916 && indx != virtual_stack_vars_rtx))
2917 if (!DISP_IN_RANGE (offset))
2922 /* All the special cases are pointers. */
2925 /* In the small-PIC case, the linker converts @GOT
2926 and @GOTNTPOFF offsets to possible displacements. */
2927 if (GET_CODE (disp) == UNSPEC
2928 && (XINT (disp, 1) == UNSPEC_GOT
2929 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2935 /* Accept pool label offsets. */
2936 else if (GET_CODE (disp) == UNSPEC
2937 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2940 /* Accept literal pool references. */
2941 else if (GET_CODE (disp) == UNSPEC
2942 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2944 /* In case CSE pulled a non literal pool reference out of
2945 the pool we have to reject the address. This is
2946 especially important when loading the GOT pointer on non
2947 zarch CPUs. In this case the literal pool contains an lt
2948 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2949 will most likely exceed the displacement. */
2950 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2951 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2954 orig_disp = gen_rtx_CONST (Pmode, disp);
2957 /* If we have an offset, make sure it does not
2958 exceed the size of the constant pool entry. */
2959 rtx sym = XVECEXP (disp, 0, 0);
2960 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2963 orig_disp = plus_constant (Pmode, orig_disp, offset);
2978 out->disp = orig_disp;
2979 out->pointer = pointer;
2980 out->literal_pool = literal_pool;
2986 /* Decompose a RTL expression OP for an address style operand into its
2987 components, and return the base register in BASE and the offset in
2988 OFFSET. While OP looks like an address it is never supposed to be
2991 Return true if OP is a valid address operand, false if not. */
2994 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
2995 HOST_WIDE_INT *offset)
2999 /* We can have an integer constant, an address register,
3000 or a sum of the two. */
3001 if (CONST_SCALAR_INT_P (op))
3006 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3011 while (op && GET_CODE (op) == SUBREG)
3012 op = SUBREG_REG (op);
3014 if (op && GET_CODE (op) != REG)
3019 if (off == NULL_RTX)
3021 else if (CONST_INT_P (off))
3022 *offset = INTVAL (off);
3023 else if (CONST_WIDE_INT_P (off))
3024 /* The offset will anyway be cut down to 12 bits so take just
3025 the lowest order chunk of the wide int. */
3026 *offset = CONST_WIDE_INT_ELT (off, 0);
3037 /* Return true if CODE is a valid address without index. */
3040 s390_legitimate_address_without_index_p (rtx op)
3042 struct s390_address addr;
3044 if (!s390_decompose_address (XEXP (op, 0), &addr))
3053 /* Return TRUE if ADDR is an operand valid for a load/store relative
3054 instruction. Be aware that the alignment of the operand needs to
3055 be checked separately.
3056 Valid addresses are single references or a sum of a reference and a
3057 constant integer. Return these parts in SYMREF and ADDEND. You can
3058 pass NULL in REF and/or ADDEND if you are not interested in these
3059 values. Literal pool references are *not* considered symbol
3063 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3065 HOST_WIDE_INT tmpaddend = 0;
3067 if (GET_CODE (addr) == CONST)
3068 addr = XEXP (addr, 0);
3070 if (GET_CODE (addr) == PLUS)
3072 if (!CONST_INT_P (XEXP (addr, 1)))
3075 tmpaddend = INTVAL (XEXP (addr, 1));
3076 addr = XEXP (addr, 0);
3079 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3080 || (GET_CODE (addr) == UNSPEC
3081 && (XINT (addr, 1) == UNSPEC_GOTENT
3082 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3087 *addend = tmpaddend;
3094 /* Return true if the address in OP is valid for constraint letter C
3095 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3096 pool MEMs should be accepted. Only the Q, R, S, T constraint
3097 letters are allowed for C. */
3100 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3102 struct s390_address addr;
3103 bool decomposed = false;
3105 /* This check makes sure that no symbolic address (except literal
3106 pool references) are accepted by the R or T constraints. */
3107 if (s390_loadrelative_operand_p (op, NULL, NULL))
3110 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3113 if (!s390_decompose_address (op, &addr))
3115 if (addr.literal_pool)
3120 /* With reload, we sometimes get intermediate address forms that are
3121 actually invalid as-is, but we need to accept them in the most
3122 generic cases below ('R' or 'T'), since reload will in fact fix
3123 them up. LRA behaves differently here; we never see such forms,
3124 but on the other hand, we need to strictly reject every invalid
3125 address form. Perform this check right up front. */
3126 if (lra_in_progress)
3128 if (!decomposed && !s390_decompose_address (op, &addr))
3135 case 'Q': /* no index short displacement */
3136 if (!decomposed && !s390_decompose_address (op, &addr))
3140 if (!s390_short_displacement (addr.disp))
3144 case 'R': /* with index short displacement */
3145 if (TARGET_LONG_DISPLACEMENT)
3147 if (!decomposed && !s390_decompose_address (op, &addr))
3149 if (!s390_short_displacement (addr.disp))
3152 /* Any invalid address here will be fixed up by reload,
3153 so accept it for the most generic constraint. */
3156 case 'S': /* no index long displacement */
3157 if (!decomposed && !s390_decompose_address (op, &addr))
3163 case 'T': /* with index long displacement */
3164 /* Any invalid address here will be fixed up by reload,
3165 so accept it for the most generic constraint. */
3175 /* Evaluates constraint strings described by the regular expression
3176 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3177 the constraint given in STR, or 0 else. */
3180 s390_mem_constraint (const char *str, rtx op)
3187 /* Check for offsettable variants of memory constraints. */
3188 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3190 if ((reload_completed || reload_in_progress)
3191 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3193 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3195 /* Check for non-literal-pool variants of memory constraints. */
3198 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3203 if (GET_CODE (op) != MEM)
3205 return s390_check_qrst_address (c, XEXP (op, 0), true);
3207 /* Simply check for the basic form of a shift count. Reload will
3208 take care of making sure we have a proper base register. */
3209 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3213 return s390_check_qrst_address (str[1], op, true);
3221 /* Evaluates constraint strings starting with letter O. Input
3222 parameter C is the second letter following the "O" in the constraint
3223 string. Returns 1 if VALUE meets the respective constraint and 0
3227 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3235 return trunc_int_for_mode (value, SImode) == value;
3239 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3242 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3250 /* Evaluates constraint strings starting with letter N. Parameter STR
3251 contains the letters following letter "N" in the constraint string.
3252 Returns true if VALUE matches the constraint. */
3255 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3257 machine_mode mode, part_mode;
3259 int part, part_goal;
3265 part_goal = str[0] - '0';
3309 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3312 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3315 if (part_goal != -1 && part_goal != part)
3322 /* Returns true if the input parameter VALUE is a float zero. */
3325 s390_float_const_zero_p (rtx value)
3327 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3328 && value == CONST0_RTX (GET_MODE (value)));
3331 /* Implement TARGET_REGISTER_MOVE_COST. */
3334 s390_register_move_cost (machine_mode mode,
3335 reg_class_t from, reg_class_t to)
3337 /* On s390, copy between fprs and gprs is expensive. */
3339 /* It becomes somewhat faster having ldgr/lgdr. */
3340 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3342 /* ldgr is single cycle. */
3343 if (reg_classes_intersect_p (from, GENERAL_REGS)
3344 && reg_classes_intersect_p (to, FP_REGS))
3346 /* lgdr needs 3 cycles. */
3347 if (reg_classes_intersect_p (to, GENERAL_REGS)
3348 && reg_classes_intersect_p (from, FP_REGS))
3352 /* Otherwise copying is done via memory. */
3353 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3354 && reg_classes_intersect_p (to, FP_REGS))
3355 || (reg_classes_intersect_p (from, FP_REGS)
3356 && reg_classes_intersect_p (to, GENERAL_REGS)))
3362 /* Implement TARGET_MEMORY_MOVE_COST. */
3365 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3366 reg_class_t rclass ATTRIBUTE_UNUSED,
3367 bool in ATTRIBUTE_UNUSED)
3372 /* Compute a (partial) cost for rtx X. Return true if the complete
3373 cost has been computed, and false if subexpressions should be
3374 scanned. In either case, *TOTAL contains the cost result. The
3375 initial value of *TOTAL is the default value computed by
3376 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3377 code of the superexpression of x. */
3380 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3381 int opno ATTRIBUTE_UNUSED,
3382 int *total, bool speed ATTRIBUTE_UNUSED)
3384 int code = GET_CODE (x);
3392 case CONST_WIDE_INT:
3399 if (GET_CODE (XEXP (x, 0)) == AND
3400 && GET_CODE (XEXP (x, 1)) == ASHIFT
3401 && REG_P (XEXP (XEXP (x, 0), 0))
3402 && REG_P (XEXP (XEXP (x, 1), 0))
3403 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3404 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3405 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3406 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3408 *total = COSTS_N_INSNS (2);
3412 /* ~AND on a 128 bit mode. This can be done using a vector
3415 && GET_CODE (XEXP (x, 0)) == NOT
3416 && GET_CODE (XEXP (x, 1)) == NOT
3417 && REG_P (XEXP (XEXP (x, 0), 0))
3418 && REG_P (XEXP (XEXP (x, 1), 0))
3419 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3420 && s390_hard_regno_mode_ok (VR0_REGNUM,
3421 GET_MODE (XEXP (XEXP (x, 0), 0))))
3423 *total = COSTS_N_INSNS (1);
3436 *total = COSTS_N_INSNS (1);
3441 *total = COSTS_N_INSNS (1);
3449 rtx left = XEXP (x, 0);
3450 rtx right = XEXP (x, 1);
3451 if (GET_CODE (right) == CONST_INT
3452 && CONST_OK_FOR_K (INTVAL (right)))
3453 *total = s390_cost->mhi;
3454 else if (GET_CODE (left) == SIGN_EXTEND)
3455 *total = s390_cost->mh;
3457 *total = s390_cost->ms; /* msr, ms, msy */
3462 rtx left = XEXP (x, 0);
3463 rtx right = XEXP (x, 1);
3466 if (GET_CODE (right) == CONST_INT
3467 && CONST_OK_FOR_K (INTVAL (right)))
3468 *total = s390_cost->mghi;
3469 else if (GET_CODE (left) == SIGN_EXTEND)
3470 *total = s390_cost->msgf;
3472 *total = s390_cost->msg; /* msgr, msg */
3474 else /* TARGET_31BIT */
3476 if (GET_CODE (left) == SIGN_EXTEND
3477 && GET_CODE (right) == SIGN_EXTEND)
3478 /* mulsidi case: mr, m */
3479 *total = s390_cost->m;
3480 else if (GET_CODE (left) == ZERO_EXTEND
3481 && GET_CODE (right) == ZERO_EXTEND
3482 && TARGET_CPU_ZARCH)
3483 /* umulsidi case: ml, mlr */
3484 *total = s390_cost->ml;
3486 /* Complex calculation is required. */
3487 *total = COSTS_N_INSNS (40);
3493 *total = s390_cost->mult_df;
3496 *total = s390_cost->mxbr;
3507 *total = s390_cost->madbr;
3510 *total = s390_cost->maebr;
3515 /* Negate in the third argument is free: FMSUB. */
3516 if (GET_CODE (XEXP (x, 2)) == NEG)
3518 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3519 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3520 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3527 if (mode == TImode) /* 128 bit division */
3528 *total = s390_cost->dlgr;
3529 else if (mode == DImode)
3531 rtx right = XEXP (x, 1);
3532 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3533 *total = s390_cost->dlr;
3534 else /* 64 by 64 bit division */
3535 *total = s390_cost->dlgr;
3537 else if (mode == SImode) /* 32 bit division */
3538 *total = s390_cost->dlr;
3545 rtx right = XEXP (x, 1);
3546 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3548 *total = s390_cost->dsgfr;
3550 *total = s390_cost->dr;
3551 else /* 64 by 64 bit division */
3552 *total = s390_cost->dsgr;
3554 else if (mode == SImode) /* 32 bit division */
3555 *total = s390_cost->dlr;
3556 else if (mode == SFmode)
3558 *total = s390_cost->debr;
3560 else if (mode == DFmode)
3562 *total = s390_cost->ddbr;
3564 else if (mode == TFmode)
3566 *total = s390_cost->dxbr;
3572 *total = s390_cost->sqebr;
3573 else if (mode == DFmode)
3574 *total = s390_cost->sqdbr;
3576 *total = s390_cost->sqxbr;
3581 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3582 || outer_code == PLUS || outer_code == MINUS
3583 || outer_code == COMPARE)
3588 *total = COSTS_N_INSNS (1);
3589 if (GET_CODE (XEXP (x, 0)) == AND
3590 && GET_CODE (XEXP (x, 1)) == CONST_INT
3591 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3593 rtx op0 = XEXP (XEXP (x, 0), 0);
3594 rtx op1 = XEXP (XEXP (x, 0), 1);
3595 rtx op2 = XEXP (x, 1);
3597 if (memory_operand (op0, GET_MODE (op0))
3598 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3600 if (register_operand (op0, GET_MODE (op0))
3601 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3611 /* Return the cost of an address rtx ADDR. */
3614 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3615 addr_space_t as ATTRIBUTE_UNUSED,
3616 bool speed ATTRIBUTE_UNUSED)
3618 struct s390_address ad;
3619 if (!s390_decompose_address (addr, &ad))
3622 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3625 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3627 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3629 int misalign ATTRIBUTE_UNUSED)
3631 switch (type_of_cost)
3641 case cond_branch_not_taken:
3643 case vec_promote_demote:
3644 case unaligned_load:
3645 case unaligned_store:
3648 case cond_branch_taken:
3652 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3659 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3660 otherwise return 0. */
3663 tls_symbolic_operand (rtx op)
3665 if (GET_CODE (op) != SYMBOL_REF)
3667 return SYMBOL_REF_TLS_MODEL (op);
3670 /* Split DImode access register reference REG (on 64-bit) into its constituent
3671 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3672 gen_highpart cannot be used as they assume all registers are word-sized,
3673 while our access registers have only half that size. */
3676 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3678 gcc_assert (TARGET_64BIT);
3679 gcc_assert (ACCESS_REG_P (reg));
3680 gcc_assert (GET_MODE (reg) == DImode);
3681 gcc_assert (!(REGNO (reg) & 1));
3683 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3684 *hi = gen_rtx_REG (SImode, REGNO (reg));
3687 /* Return true if OP contains a symbol reference */
3690 symbolic_reference_mentioned_p (rtx op)
3695 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3698 fmt = GET_RTX_FORMAT (GET_CODE (op));
3699 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3705 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3706 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3710 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3717 /* Return true if OP contains a reference to a thread-local symbol. */
3720 tls_symbolic_reference_mentioned_p (rtx op)
3725 if (GET_CODE (op) == SYMBOL_REF)
3726 return tls_symbolic_operand (op);
3728 fmt = GET_RTX_FORMAT (GET_CODE (op));
3729 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3735 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3736 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3740 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3748 /* Return true if OP is a legitimate general operand when
3749 generating PIC code. It is given that flag_pic is on
3750 and that OP satisfies CONSTANT_P. */
3753 legitimate_pic_operand_p (rtx op)
3755 /* Accept all non-symbolic constants. */
3756 if (!SYMBOLIC_CONST (op))
3759 /* Reject everything else; must be handled
3760 via emit_symbolic_move. */
3764 /* Returns true if the constant value OP is a legitimate general operand.
3765 It is given that OP satisfies CONSTANT_P. */
3768 s390_legitimate_constant_p (machine_mode mode, rtx op)
3770 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3772 if (GET_MODE_SIZE (mode) != 16)
3775 if (!satisfies_constraint_j00 (op)
3776 && !satisfies_constraint_jm1 (op)
3777 && !satisfies_constraint_jKK (op)
3778 && !satisfies_constraint_jxx (op)
3779 && !satisfies_constraint_jyy (op))
3783 /* Accept all non-symbolic constants. */
3784 if (!SYMBOLIC_CONST (op))
3787 /* Accept immediate LARL operands. */
3788 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3791 /* Thread-local symbols are never legal constants. This is
3792 so that emit_call knows that computing such addresses
3793 might require a function call. */
3794 if (TLS_SYMBOLIC_CONST (op))
3797 /* In the PIC case, symbolic constants must *not* be
3798 forced into the literal pool. We accept them here,
3799 so that they will be handled by emit_symbolic_move. */
3803 /* All remaining non-PIC symbolic constants are
3804 forced into the literal pool. */
3808 /* Determine if it's legal to put X into the constant pool. This
3809 is not possible if X contains the address of a symbol that is
3810 not constant (TLS) or not known at final link time (PIC). */
3813 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3815 switch (GET_CODE (x))
3819 case CONST_WIDE_INT:
3821 /* Accept all non-symbolic constants. */
3825 /* Labels are OK iff we are non-PIC. */
3826 return flag_pic != 0;
3829 /* 'Naked' TLS symbol references are never OK,
3830 non-TLS symbols are OK iff we are non-PIC. */
3831 if (tls_symbolic_operand (x))
3834 return flag_pic != 0;
3837 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3840 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3841 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3844 switch (XINT (x, 1))
3846 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3847 case UNSPEC_LTREL_OFFSET:
3855 case UNSPEC_GOTNTPOFF:
3856 case UNSPEC_INDNTPOFF:
3859 /* If the literal pool shares the code section, be put
3860 execute template placeholders into the pool as well. */
3862 return TARGET_CPU_ZARCH;
3874 /* Returns true if the constant value OP is a legitimate general
3875 operand during and after reload. The difference to
3876 legitimate_constant_p is that this function will not accept
3877 a constant that would need to be forced to the literal pool
3878 before it can be used as operand.
3879 This function accepts all constants which can be loaded directly
3883 legitimate_reload_constant_p (rtx op)
3885 /* Accept la(y) operands. */
3886 if (GET_CODE (op) == CONST_INT
3887 && DISP_IN_RANGE (INTVAL (op)))
3890 /* Accept l(g)hi/l(g)fi operands. */
3891 if (GET_CODE (op) == CONST_INT
3892 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3895 /* Accept lliXX operands. */
3897 && GET_CODE (op) == CONST_INT
3898 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3899 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3903 && GET_CODE (op) == CONST_INT
3904 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3905 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3908 /* Accept larl operands. */
3909 if (TARGET_CPU_ZARCH
3910 && larl_operand (op, VOIDmode))
3913 /* Accept floating-point zero operands that fit into a single GPR. */
3914 if (GET_CODE (op) == CONST_DOUBLE
3915 && s390_float_const_zero_p (op)
3916 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3919 /* Accept double-word operands that can be split. */
3920 if (GET_CODE (op) == CONST_WIDE_INT
3921 || (GET_CODE (op) == CONST_INT
3922 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
3924 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
3925 rtx hi = operand_subword (op, 0, 0, dword_mode);
3926 rtx lo = operand_subword (op, 1, 0, dword_mode);
3927 return legitimate_reload_constant_p (hi)
3928 && legitimate_reload_constant_p (lo);
3931 /* Everything else cannot be handled without reload. */
3935 /* Returns true if the constant value OP is a legitimate fp operand
3936 during and after reload.
3937 This function accepts all constants which can be loaded directly
3941 legitimate_reload_fp_constant_p (rtx op)
3943 /* Accept floating-point zero operands if the load zero instruction
3944 can be used. Prior to z196 the load fp zero instruction caused a
3945 performance penalty if the result is used as BFP number. */
3947 && GET_CODE (op) == CONST_DOUBLE
3948 && s390_float_const_zero_p (op))
3954 /* Returns true if the constant value OP is a legitimate vector operand
3955 during and after reload.
3956 This function accepts all constants which can be loaded directly
3960 legitimate_reload_vector_constant_p (rtx op)
3962 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
3963 && (satisfies_constraint_j00 (op)
3964 || satisfies_constraint_jm1 (op)
3965 || satisfies_constraint_jKK (op)
3966 || satisfies_constraint_jxx (op)
3967 || satisfies_constraint_jyy (op)))
3973 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
3974 return the class of reg to actually use. */
3977 s390_preferred_reload_class (rtx op, reg_class_t rclass)
3979 switch (GET_CODE (op))
3981 /* Constants we cannot reload into general registers
3982 must be forced into the literal pool. */
3986 case CONST_WIDE_INT:
3987 if (reg_class_subset_p (GENERAL_REGS, rclass)
3988 && legitimate_reload_constant_p (op))
3989 return GENERAL_REGS;
3990 else if (reg_class_subset_p (ADDR_REGS, rclass)
3991 && legitimate_reload_constant_p (op))
3993 else if (reg_class_subset_p (FP_REGS, rclass)
3994 && legitimate_reload_fp_constant_p (op))
3996 else if (reg_class_subset_p (VEC_REGS, rclass)
3997 && legitimate_reload_vector_constant_p (op))
4002 /* If a symbolic constant or a PLUS is reloaded,
4003 it is most likely being used as an address, so
4004 prefer ADDR_REGS. If 'class' is not a superset
4005 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4007 /* Symrefs cannot be pushed into the literal pool with -fPIC
4008 so we *MUST NOT* return NO_REGS for these cases
4009 (s390_cannot_force_const_mem will return true).
4011 On the other hand we MUST return NO_REGS for symrefs with
4012 invalid addend which might have been pushed to the literal
4013 pool (no -fPIC). Usually we would expect them to be
4014 handled via secondary reload but this does not happen if
4015 they are used as literal pool slot replacement in reload
4016 inheritance (see emit_input_reload_insns). */
4017 if (TARGET_CPU_ZARCH
4018 && GET_CODE (XEXP (op, 0)) == PLUS
4019 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4020 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4022 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4030 if (!legitimate_reload_constant_p (op))
4034 /* load address will be used. */
4035 if (reg_class_subset_p (ADDR_REGS, rclass))
4047 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4048 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4052 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4054 HOST_WIDE_INT addend;
4057 /* The "required alignment" might be 0 (e.g. for certain structs
4058 accessed via BLKmode). Early abort in this case, as well as when
4059 an alignment > 8 is required. */
4060 if (alignment < 2 || alignment > 8)
4063 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4066 if (addend & (alignment - 1))
4069 if (GET_CODE (symref) == SYMBOL_REF)
4071 /* We have load-relative instructions for 2-byte, 4-byte, and
4072 8-byte alignment so allow only these. */
4075 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4076 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4077 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4078 default: return false;
4082 if (GET_CODE (symref) == UNSPEC
4083 && alignment <= UNITS_PER_LONG)
4089 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4090 operand SCRATCH is used to reload the even part of the address and
4094 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4096 HOST_WIDE_INT addend;
4099 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4103 /* Easy case. The addend is even so larl will do fine. */
4104 emit_move_insn (reg, addr);
4107 /* We can leave the scratch register untouched if the target
4108 register is a valid base register. */
4109 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4110 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4113 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4114 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4117 emit_move_insn (scratch,
4118 gen_rtx_CONST (Pmode,
4119 gen_rtx_PLUS (Pmode, symref,
4120 GEN_INT (addend - 1))));
4122 emit_move_insn (scratch, symref);
4124 /* Increment the address using la in order to avoid clobbering cc. */
4125 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4129 /* Generate what is necessary to move between REG and MEM using
4130 SCRATCH. The direction is given by TOMEM. */
4133 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4135 /* Reload might have pulled a constant out of the literal pool.
4136 Force it back in. */
4137 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4138 || GET_CODE (mem) == CONST_WIDE_INT
4139 || GET_CODE (mem) == CONST_VECTOR
4140 || GET_CODE (mem) == CONST)
4141 mem = force_const_mem (GET_MODE (reg), mem);
4143 gcc_assert (MEM_P (mem));
4145 /* For a load from memory we can leave the scratch register
4146 untouched if the target register is a valid base register. */
4148 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4149 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4150 && GET_MODE (reg) == GET_MODE (scratch))
4153 /* Load address into scratch register. Since we can't have a
4154 secondary reload for a secondary reload we have to cover the case
4155 where larl would need a secondary reload here as well. */
4156 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4158 /* Now we can use a standard load/store to do the move. */
4160 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4162 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4165 /* Inform reload about cases where moving X with a mode MODE to a register in
4166 RCLASS requires an extra scratch or immediate register. Return the class
4167 needed for the immediate register. */
4170 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4171 machine_mode mode, secondary_reload_info *sri)
4173 enum reg_class rclass = (enum reg_class) rclass_i;
4175 /* Intermediate register needed. */
4176 if (reg_classes_intersect_p (CC_REGS, rclass))
4177 return GENERAL_REGS;
4181 /* The vst/vl vector move instructions allow only for short
4184 && GET_CODE (XEXP (x, 0)) == PLUS
4185 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4186 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4187 && reg_class_subset_p (rclass, VEC_REGS)
4188 && (!reg_class_subset_p (rclass, FP_REGS)
4189 || (GET_MODE_SIZE (mode) > 8
4190 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4193 sri->icode = (TARGET_64BIT ?
4194 CODE_FOR_reloaddi_la_in :
4195 CODE_FOR_reloadsi_la_in);
4197 sri->icode = (TARGET_64BIT ?
4198 CODE_FOR_reloaddi_la_out :
4199 CODE_FOR_reloadsi_la_out);
4205 HOST_WIDE_INT offset;
4208 /* On z10 several optimizer steps may generate larl operands with
4211 && s390_loadrelative_operand_p (x, &symref, &offset)
4213 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4214 && (offset & 1) == 1)
4215 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4216 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4218 /* Handle all the (mem (symref)) accesses we cannot use the z10
4219 instructions for. */
4221 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4223 || !reg_class_subset_p (rclass, GENERAL_REGS)
4224 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4225 || !s390_check_symref_alignment (XEXP (x, 0),
4226 GET_MODE_SIZE (mode))))
4228 #define __SECONDARY_RELOAD_CASE(M,m) \
4231 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4232 CODE_FOR_reload##m##di_tomem_z10; \
4234 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4235 CODE_FOR_reload##m##si_tomem_z10; \
4238 switch (GET_MODE (x))
4240 __SECONDARY_RELOAD_CASE (QI, qi);
4241 __SECONDARY_RELOAD_CASE (HI, hi);
4242 __SECONDARY_RELOAD_CASE (SI, si);
4243 __SECONDARY_RELOAD_CASE (DI, di);
4244 __SECONDARY_RELOAD_CASE (TI, ti);
4245 __SECONDARY_RELOAD_CASE (SF, sf);
4246 __SECONDARY_RELOAD_CASE (DF, df);
4247 __SECONDARY_RELOAD_CASE (TF, tf);
4248 __SECONDARY_RELOAD_CASE (SD, sd);
4249 __SECONDARY_RELOAD_CASE (DD, dd);
4250 __SECONDARY_RELOAD_CASE (TD, td);
4251 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4252 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4253 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4254 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4255 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4256 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4257 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4258 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4259 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4260 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4261 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4262 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4263 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4264 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4265 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4266 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4267 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4268 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4269 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4270 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4271 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4275 #undef __SECONDARY_RELOAD_CASE
4279 /* We need a scratch register when loading a PLUS expression which
4280 is not a legitimate operand of the LOAD ADDRESS instruction. */
4281 /* LRA can deal with transformation of plus op very well -- so we
4282 don't need to prompt LRA in this case. */
4283 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4284 sri->icode = (TARGET_64BIT ?
4285 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4287 /* Performing a multiword move from or to memory we have to make sure the
4288 second chunk in memory is addressable without causing a displacement
4289 overflow. If that would be the case we calculate the address in
4290 a scratch register. */
4292 && GET_CODE (XEXP (x, 0)) == PLUS
4293 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4294 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4295 + GET_MODE_SIZE (mode) - 1))
4297 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4298 in a s_operand address since we may fallback to lm/stm. So we only
4299 have to care about overflows in the b+i+d case. */
4300 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4301 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4302 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4303 /* For FP_REGS no lm/stm is available so this check is triggered
4304 for displacement overflows in b+i+d and b+d like addresses. */
4305 || (reg_classes_intersect_p (FP_REGS, rclass)
4306 && s390_class_max_nregs (FP_REGS, mode) > 1))
4309 sri->icode = (TARGET_64BIT ?
4310 CODE_FOR_reloaddi_la_in :
4311 CODE_FOR_reloadsi_la_in);
4313 sri->icode = (TARGET_64BIT ?
4314 CODE_FOR_reloaddi_la_out :
4315 CODE_FOR_reloadsi_la_out);
4319 /* A scratch address register is needed when a symbolic constant is
4320 copied to r0 compiling with -fPIC. In other cases the target
4321 register might be used as temporary (see legitimize_pic_address). */
4322 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4323 sri->icode = (TARGET_64BIT ?
4324 CODE_FOR_reloaddi_PIC_addr :
4325 CODE_FOR_reloadsi_PIC_addr);
4327 /* Either scratch or no register needed. */
4331 /* Generate code to load SRC, which is PLUS that is not a
4332 legitimate operand for the LA instruction, into TARGET.
4333 SCRATCH may be used as scratch register. */
4336 s390_expand_plus_operand (rtx target, rtx src,
4340 struct s390_address ad;
4342 /* src must be a PLUS; get its two operands. */
4343 gcc_assert (GET_CODE (src) == PLUS);
4344 gcc_assert (GET_MODE (src) == Pmode);
4346 /* Check if any of the two operands is already scheduled
4347 for replacement by reload. This can happen e.g. when
4348 float registers occur in an address. */
4349 sum1 = find_replacement (&XEXP (src, 0));
4350 sum2 = find_replacement (&XEXP (src, 1));
4351 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4353 /* If the address is already strictly valid, there's nothing to do. */
4354 if (!s390_decompose_address (src, &ad)
4355 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4356 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4358 /* Otherwise, one of the operands cannot be an address register;
4359 we reload its value into the scratch register. */
4360 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4362 emit_move_insn (scratch, sum1);
4365 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4367 emit_move_insn (scratch, sum2);
4371 /* According to the way these invalid addresses are generated
4372 in reload.c, it should never happen (at least on s390) that
4373 *neither* of the PLUS components, after find_replacements
4374 was applied, is an address register. */
4375 if (sum1 == scratch && sum2 == scratch)
4381 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4384 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4385 is only ever performed on addresses, so we can mark the
4386 sum as legitimate for LA in any case. */
4387 s390_load_address (target, src);
4391 /* Return true if ADDR is a valid memory address.
4392 STRICT specifies whether strict register checking applies. */
4395 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4397 struct s390_address ad;
4400 && larl_operand (addr, VOIDmode)
4401 && (mode == VOIDmode
4402 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4405 if (!s390_decompose_address (addr, &ad))
4410 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4413 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4419 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4420 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4424 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4425 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4431 /* Return true if OP is a valid operand for the LA instruction.
4432 In 31-bit, we need to prove that the result is used as an
4433 address, as LA performs only a 31-bit addition. */
4436 legitimate_la_operand_p (rtx op)
4438 struct s390_address addr;
4439 if (!s390_decompose_address (op, &addr))
4442 return (TARGET_64BIT || addr.pointer);
4445 /* Return true if it is valid *and* preferable to use LA to
4446 compute the sum of OP1 and OP2. */
4449 preferred_la_operand_p (rtx op1, rtx op2)
4451 struct s390_address addr;
4453 if (op2 != const0_rtx)
4454 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4456 if (!s390_decompose_address (op1, &addr))
4458 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4460 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4463 /* Avoid LA instructions with index register on z196; it is
4464 preferable to use regular add instructions when possible.
4465 Starting with zEC12 the la with index register is "uncracked"
4467 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4470 if (!TARGET_64BIT && !addr.pointer)
4476 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4477 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4483 /* Emit a forced load-address operation to load SRC into DST.
4484 This will use the LOAD ADDRESS instruction even in situations
4485 where legitimate_la_operand_p (SRC) returns false. */
4488 s390_load_address (rtx dst, rtx src)
4491 emit_move_insn (dst, src);
4493 emit_insn (gen_force_la_31 (dst, src));
4496 /* Return a legitimate reference for ORIG (an address) using the
4497 register REG. If REG is 0, a new pseudo is generated.
4499 There are two types of references that must be handled:
4501 1. Global data references must load the address from the GOT, via
4502 the PIC reg. An insn is emitted to do this load, and the reg is
4505 2. Static data references, constant pool addresses, and code labels
4506 compute the address as an offset from the GOT, whose base is in
4507 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4508 differentiate them from global data objects. The returned
4509 address is the PIC reg + an unspec constant.
4511 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4512 reg also appears in the address. */
4515 legitimize_pic_address (rtx orig, rtx reg)
4518 rtx addend = const0_rtx;
4521 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4523 if (GET_CODE (addr) == CONST)
4524 addr = XEXP (addr, 0);
4526 if (GET_CODE (addr) == PLUS)
4528 addend = XEXP (addr, 1);
4529 addr = XEXP (addr, 0);
4532 if ((GET_CODE (addr) == LABEL_REF
4533 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
4534 || (GET_CODE (addr) == UNSPEC &&
4535 (XINT (addr, 1) == UNSPEC_GOTENT
4536 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4537 && GET_CODE (addend) == CONST_INT)
4539 /* This can be locally addressed. */
4541 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4542 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4543 gen_rtx_CONST (Pmode, addr) : addr);
4545 if (TARGET_CPU_ZARCH
4546 && larl_operand (const_addr, VOIDmode)
4547 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4548 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4550 if (INTVAL (addend) & 1)
4552 /* LARL can't handle odd offsets, so emit a pair of LARL
4554 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4556 if (!DISP_IN_RANGE (INTVAL (addend)))
4558 HOST_WIDE_INT even = INTVAL (addend) - 1;
4559 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4560 addr = gen_rtx_CONST (Pmode, addr);
4561 addend = const1_rtx;
4564 emit_move_insn (temp, addr);
4565 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4569 s390_load_address (reg, new_rtx);
4575 /* If the offset is even, we can just use LARL. This
4576 will happen automatically. */
4581 /* No larl - Access local symbols relative to the GOT. */
4583 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4585 if (reload_in_progress || reload_completed)
4586 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4588 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4589 if (addend != const0_rtx)
4590 addr = gen_rtx_PLUS (Pmode, addr, addend);
4591 addr = gen_rtx_CONST (Pmode, addr);
4592 addr = force_const_mem (Pmode, addr);
4593 emit_move_insn (temp, addr);
4595 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4598 s390_load_address (reg, new_rtx);
4603 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4605 /* A non-local symbol reference without addend.
4607 The symbol ref is wrapped into an UNSPEC to make sure the
4608 proper operand modifier (@GOT or @GOTENT) will be emitted.
4609 This will tell the linker to put the symbol into the GOT.
4611 Additionally the code dereferencing the GOT slot is emitted here.
4613 An addend to the symref needs to be added afterwards.
4614 legitimize_pic_address calls itself recursively to handle
4615 that case. So no need to do it here. */
4618 reg = gen_reg_rtx (Pmode);
4622 /* Use load relative if possible.
4623 lgrl <target>, sym@GOTENT */
4624 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4625 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4626 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4628 emit_move_insn (reg, new_rtx);
4631 else if (flag_pic == 1)
4633 /* Assume GOT offset is a valid displacement operand (< 4k
4634 or < 512k with z990). This is handled the same way in
4635 both 31- and 64-bit code (@GOT).
4636 lg <target>, sym@GOT(r12) */
4638 if (reload_in_progress || reload_completed)
4639 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4641 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4642 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4643 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4644 new_rtx = gen_const_mem (Pmode, new_rtx);
4645 emit_move_insn (reg, new_rtx);
4648 else if (TARGET_CPU_ZARCH)
4650 /* If the GOT offset might be >= 4k, we determine the position
4651 of the GOT entry via a PC-relative LARL (@GOTENT).
4652 larl temp, sym@GOTENT
4653 lg <target>, 0(temp) */
4655 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4657 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4658 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4660 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4661 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4662 emit_move_insn (temp, new_rtx);
4664 new_rtx = gen_const_mem (Pmode, temp);
4665 emit_move_insn (reg, new_rtx);
4671 /* If the GOT offset might be >= 4k, we have to load it
4672 from the literal pool (@GOT).
4674 lg temp, lit-litbase(r13)
4675 lg <target>, 0(temp)
4676 lit: .long sym@GOT */
4678 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4680 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4681 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4683 if (reload_in_progress || reload_completed)
4684 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4686 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4687 addr = gen_rtx_CONST (Pmode, addr);
4688 addr = force_const_mem (Pmode, addr);
4689 emit_move_insn (temp, addr);
4691 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4692 new_rtx = gen_const_mem (Pmode, new_rtx);
4693 emit_move_insn (reg, new_rtx);
4697 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4699 gcc_assert (XVECLEN (addr, 0) == 1);
4700 switch (XINT (addr, 1))
4702 /* These address symbols (or PLT slots) relative to the GOT
4703 (not GOT slots!). In general this will exceed the
4704 displacement range so these value belong into the literal
4708 new_rtx = force_const_mem (Pmode, orig);
4711 /* For -fPIC the GOT size might exceed the displacement
4712 range so make sure the value is in the literal pool. */
4715 new_rtx = force_const_mem (Pmode, orig);
4718 /* For @GOTENT larl is used. This is handled like local
4724 /* @PLT is OK as is on 64-bit, must be converted to
4725 GOT-relative @PLTOFF on 31-bit. */
4727 if (!TARGET_CPU_ZARCH)
4729 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4731 if (reload_in_progress || reload_completed)
4732 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4734 addr = XVECEXP (addr, 0, 0);
4735 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4737 if (addend != const0_rtx)
4738 addr = gen_rtx_PLUS (Pmode, addr, addend);
4739 addr = gen_rtx_CONST (Pmode, addr);
4740 addr = force_const_mem (Pmode, addr);
4741 emit_move_insn (temp, addr);
4743 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4746 s390_load_address (reg, new_rtx);
4751 /* On 64 bit larl can be used. This case is handled like
4752 local symbol refs. */
4756 /* Everything else cannot happen. */
4761 else if (addend != const0_rtx)
4763 /* Otherwise, compute the sum. */
4765 rtx base = legitimize_pic_address (addr, reg);
4766 new_rtx = legitimize_pic_address (addend,
4767 base == reg ? NULL_RTX : reg);
4768 if (GET_CODE (new_rtx) == CONST_INT)
4769 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4772 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4774 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4775 new_rtx = XEXP (new_rtx, 1);
4777 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4780 if (GET_CODE (new_rtx) == CONST)
4781 new_rtx = XEXP (new_rtx, 0);
4782 new_rtx = force_operand (new_rtx, 0);
4788 /* Load the thread pointer into a register. */
4791 s390_get_thread_pointer (void)
4793 rtx tp = gen_reg_rtx (Pmode);
4795 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4796 mark_reg_pointer (tp, BITS_PER_WORD);
4801 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4802 in s390_tls_symbol which always refers to __tls_get_offset.
4803 The returned offset is written to RESULT_REG and an USE rtx is
4804 generated for TLS_CALL. */
4806 static GTY(()) rtx s390_tls_symbol;
4809 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4814 emit_insn (s390_load_got ());
4816 if (!s390_tls_symbol)
4817 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4819 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4820 gen_rtx_REG (Pmode, RETURN_REGNUM));
4822 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4823 RTL_CONST_CALL_P (insn) = 1;
4826 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4827 this (thread-local) address. REG may be used as temporary. */
4830 legitimize_tls_address (rtx addr, rtx reg)
4832 rtx new_rtx, tls_call, temp, base, r2;
4835 if (GET_CODE (addr) == SYMBOL_REF)
4836 switch (tls_symbolic_operand (addr))
4838 case TLS_MODEL_GLOBAL_DYNAMIC:
4840 r2 = gen_rtx_REG (Pmode, 2);
4841 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4842 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4843 new_rtx = force_const_mem (Pmode, new_rtx);
4844 emit_move_insn (r2, new_rtx);
4845 s390_emit_tls_call_insn (r2, tls_call);
4846 insn = get_insns ();
4849 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4850 temp = gen_reg_rtx (Pmode);
4851 emit_libcall_block (insn, temp, r2, new_rtx);
4853 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4856 s390_load_address (reg, new_rtx);
4861 case TLS_MODEL_LOCAL_DYNAMIC:
4863 r2 = gen_rtx_REG (Pmode, 2);
4864 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4865 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4866 new_rtx = force_const_mem (Pmode, new_rtx);
4867 emit_move_insn (r2, new_rtx);
4868 s390_emit_tls_call_insn (r2, tls_call);
4869 insn = get_insns ();
4872 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4873 temp = gen_reg_rtx (Pmode);
4874 emit_libcall_block (insn, temp, r2, new_rtx);
4876 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4877 base = gen_reg_rtx (Pmode);
4878 s390_load_address (base, new_rtx);
4880 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4881 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4882 new_rtx = force_const_mem (Pmode, new_rtx);
4883 temp = gen_reg_rtx (Pmode);
4884 emit_move_insn (temp, new_rtx);
4886 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4889 s390_load_address (reg, new_rtx);
4894 case TLS_MODEL_INITIAL_EXEC:
4897 /* Assume GOT offset < 4k. This is handled the same way
4898 in both 31- and 64-bit code. */
4900 if (reload_in_progress || reload_completed)
4901 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4903 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4904 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4905 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4906 new_rtx = gen_const_mem (Pmode, new_rtx);
4907 temp = gen_reg_rtx (Pmode);
4908 emit_move_insn (temp, new_rtx);
4910 else if (TARGET_CPU_ZARCH)
4912 /* If the GOT offset might be >= 4k, we determine the position
4913 of the GOT entry via a PC-relative LARL. */
4915 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4916 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4917 temp = gen_reg_rtx (Pmode);
4918 emit_move_insn (temp, new_rtx);
4920 new_rtx = gen_const_mem (Pmode, temp);
4921 temp = gen_reg_rtx (Pmode);
4922 emit_move_insn (temp, new_rtx);
4926 /* If the GOT offset might be >= 4k, we have to load it
4927 from the literal pool. */
4929 if (reload_in_progress || reload_completed)
4930 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4932 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4933 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4934 new_rtx = force_const_mem (Pmode, new_rtx);
4935 temp = gen_reg_rtx (Pmode);
4936 emit_move_insn (temp, new_rtx);
4938 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4939 new_rtx = gen_const_mem (Pmode, new_rtx);
4941 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4942 temp = gen_reg_rtx (Pmode);
4943 emit_insn (gen_rtx_SET (temp, new_rtx));
4947 /* In position-dependent code, load the absolute address of
4948 the GOT entry from the literal pool. */
4950 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4951 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4952 new_rtx = force_const_mem (Pmode, new_rtx);
4953 temp = gen_reg_rtx (Pmode);
4954 emit_move_insn (temp, new_rtx);
4957 new_rtx = gen_const_mem (Pmode, new_rtx);
4958 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4959 temp = gen_reg_rtx (Pmode);
4960 emit_insn (gen_rtx_SET (temp, new_rtx));
4963 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4966 s390_load_address (reg, new_rtx);
4971 case TLS_MODEL_LOCAL_EXEC:
4972 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4973 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4974 new_rtx = force_const_mem (Pmode, new_rtx);
4975 temp = gen_reg_rtx (Pmode);
4976 emit_move_insn (temp, new_rtx);
4978 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4981 s390_load_address (reg, new_rtx);
4990 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
4992 switch (XINT (XEXP (addr, 0), 1))
4994 case UNSPEC_INDNTPOFF:
4995 gcc_assert (TARGET_CPU_ZARCH);
5004 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5005 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5007 new_rtx = XEXP (XEXP (addr, 0), 0);
5008 if (GET_CODE (new_rtx) != SYMBOL_REF)
5009 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5011 new_rtx = legitimize_tls_address (new_rtx, reg);
5012 new_rtx = plus_constant (Pmode, new_rtx,
5013 INTVAL (XEXP (XEXP (addr, 0), 1)));
5014 new_rtx = force_operand (new_rtx, 0);
5018 gcc_unreachable (); /* for now ... */
5023 /* Emit insns making the address in operands[1] valid for a standard
5024 move to operands[0]. operands[1] is replaced by an address which
5025 should be used instead of the former RTX to emit the move
5029 emit_symbolic_move (rtx *operands)
5031 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5033 if (GET_CODE (operands[0]) == MEM)
5034 operands[1] = force_reg (Pmode, operands[1]);
5035 else if (TLS_SYMBOLIC_CONST (operands[1]))
5036 operands[1] = legitimize_tls_address (operands[1], temp);
5038 operands[1] = legitimize_pic_address (operands[1], temp);
5041 /* Try machine-dependent ways of modifying an illegitimate address X
5042 to be legitimate. If we find one, return the new, valid address.
5044 OLDX is the address as it was before break_out_memory_refs was called.
5045 In some cases it is useful to look at this to decide what needs to be done.
5047 MODE is the mode of the operand pointed to by X.
5049 When -fpic is used, special handling is needed for symbolic references.
5050 See comments by legitimize_pic_address for details. */
5053 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5054 machine_mode mode ATTRIBUTE_UNUSED)
5056 rtx constant_term = const0_rtx;
5058 if (TLS_SYMBOLIC_CONST (x))
5060 x = legitimize_tls_address (x, 0);
5062 if (s390_legitimate_address_p (mode, x, FALSE))
5065 else if (GET_CODE (x) == PLUS
5066 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5067 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5073 if (SYMBOLIC_CONST (x)
5074 || (GET_CODE (x) == PLUS
5075 && (SYMBOLIC_CONST (XEXP (x, 0))
5076 || SYMBOLIC_CONST (XEXP (x, 1)))))
5077 x = legitimize_pic_address (x, 0);
5079 if (s390_legitimate_address_p (mode, x, FALSE))
5083 x = eliminate_constant_term (x, &constant_term);
5085 /* Optimize loading of large displacements by splitting them
5086 into the multiple of 4K and the rest; this allows the
5087 former to be CSE'd if possible.
5089 Don't do this if the displacement is added to a register
5090 pointing into the stack frame, as the offsets will
5091 change later anyway. */
5093 if (GET_CODE (constant_term) == CONST_INT
5094 && !TARGET_LONG_DISPLACEMENT
5095 && !DISP_IN_RANGE (INTVAL (constant_term))
5096 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5098 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5099 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5101 rtx temp = gen_reg_rtx (Pmode);
5102 rtx val = force_operand (GEN_INT (upper), temp);
5104 emit_move_insn (temp, val);
5106 x = gen_rtx_PLUS (Pmode, x, temp);
5107 constant_term = GEN_INT (lower);
5110 if (GET_CODE (x) == PLUS)
5112 if (GET_CODE (XEXP (x, 0)) == REG)
5114 rtx temp = gen_reg_rtx (Pmode);
5115 rtx val = force_operand (XEXP (x, 1), temp);
5117 emit_move_insn (temp, val);
5119 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5122 else if (GET_CODE (XEXP (x, 1)) == REG)
5124 rtx temp = gen_reg_rtx (Pmode);
5125 rtx val = force_operand (XEXP (x, 0), temp);
5127 emit_move_insn (temp, val);
5129 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5133 if (constant_term != const0_rtx)
5134 x = gen_rtx_PLUS (Pmode, x, constant_term);
5139 /* Try a machine-dependent way of reloading an illegitimate address AD
5140 operand. If we find one, push the reload and return the new address.
5142 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5143 and TYPE is the reload type of the current reload. */
5146 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5147 int opnum, int type)
5149 if (!optimize || TARGET_LONG_DISPLACEMENT)
5152 if (GET_CODE (ad) == PLUS)
5154 rtx tem = simplify_binary_operation (PLUS, Pmode,
5155 XEXP (ad, 0), XEXP (ad, 1));
5160 if (GET_CODE (ad) == PLUS
5161 && GET_CODE (XEXP (ad, 0)) == REG
5162 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5163 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5165 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5166 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5167 rtx cst, tem, new_rtx;
5169 cst = GEN_INT (upper);
5170 if (!legitimate_reload_constant_p (cst))
5171 cst = force_const_mem (Pmode, cst);
5173 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5174 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5176 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5177 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5178 opnum, (enum reload_type) type);
5185 /* Emit code to move LEN bytes from DST to SRC. */
5188 s390_expand_movmem (rtx dst, rtx src, rtx len)
5190 /* When tuning for z10 or higher we rely on the Glibc functions to
5191 do the right thing. Only for constant lengths below 64k we will
5192 generate inline code. */
5193 if (s390_tune >= PROCESSOR_2097_Z10
5194 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5197 /* Expand memcpy for constant length operands without a loop if it
5198 is shorter that way.
5200 With a constant length argument a
5201 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5202 if (GET_CODE (len) == CONST_INT
5203 && INTVAL (len) >= 0
5204 && INTVAL (len) <= 256 * 6
5205 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5209 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5211 rtx newdst = adjust_address (dst, BLKmode, o);
5212 rtx newsrc = adjust_address (src, BLKmode, o);
5213 emit_insn (gen_movmem_short (newdst, newsrc,
5214 GEN_INT (l > 256 ? 255 : l - 1)));
5218 else if (TARGET_MVCLE)
5220 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5225 rtx dst_addr, src_addr, count, blocks, temp;
5226 rtx_code_label *loop_start_label = gen_label_rtx ();
5227 rtx_code_label *loop_end_label = gen_label_rtx ();
5228 rtx_code_label *end_label = gen_label_rtx ();
5231 mode = GET_MODE (len);
5232 if (mode == VOIDmode)
5235 dst_addr = gen_reg_rtx (Pmode);
5236 src_addr = gen_reg_rtx (Pmode);
5237 count = gen_reg_rtx (mode);
5238 blocks = gen_reg_rtx (mode);
5240 convert_move (count, len, 1);
5241 emit_cmp_and_jump_insns (count, const0_rtx,
5242 EQ, NULL_RTX, mode, 1, end_label);
5244 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5245 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5246 dst = change_address (dst, VOIDmode, dst_addr);
5247 src = change_address (src, VOIDmode, src_addr);
5249 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5252 emit_move_insn (count, temp);
5254 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5257 emit_move_insn (blocks, temp);
5259 emit_cmp_and_jump_insns (blocks, const0_rtx,
5260 EQ, NULL_RTX, mode, 1, loop_end_label);
5262 emit_label (loop_start_label);
5265 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5269 /* Issue a read prefetch for the +3 cache line. */
5270 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5271 const0_rtx, const0_rtx);
5272 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5273 emit_insn (prefetch);
5275 /* Issue a write prefetch for the +3 cache line. */
5276 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5277 const1_rtx, const0_rtx);
5278 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5279 emit_insn (prefetch);
5282 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5283 s390_load_address (dst_addr,
5284 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5285 s390_load_address (src_addr,
5286 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5288 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5291 emit_move_insn (blocks, temp);
5293 emit_cmp_and_jump_insns (blocks, const0_rtx,
5294 EQ, NULL_RTX, mode, 1, loop_end_label);
5296 emit_jump (loop_start_label);
5297 emit_label (loop_end_label);
5299 emit_insn (gen_movmem_short (dst, src,
5300 convert_to_mode (Pmode, count, 1)));
5301 emit_label (end_label);
5306 /* Emit code to set LEN bytes at DST to VAL.
5307 Make use of clrmem if VAL is zero. */
5310 s390_expand_setmem (rtx dst, rtx len, rtx val)
5312 const int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5314 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5317 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5319 /* Expand setmem/clrmem for a constant length operand without a
5320 loop if it will be shorter that way.
5321 With a constant length and without pfd argument a
5322 clrmem loop is 32 bytes -> 5.3 * xc
5323 setmem loop is 36 bytes -> 3.6 * (mvi/stc + mvc) */
5324 if (GET_CODE (len) == CONST_INT
5325 && ((INTVAL (len) <= 256 * 5 && val == const0_rtx)
5326 || INTVAL (len) <= 257 * 3)
5327 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5331 if (val == const0_rtx)
5332 /* clrmem: emit 256 byte blockwise XCs. */
5333 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5335 rtx newdst = adjust_address (dst, BLKmode, o);
5336 emit_insn (gen_clrmem_short (newdst,
5337 GEN_INT (l > 256 ? 255 : l - 1)));
5340 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5341 setting first byte to val and using a 256 byte mvc with one
5342 byte overlap to propagate the byte. */
5343 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5345 rtx newdst = adjust_address (dst, BLKmode, o);
5346 emit_move_insn (adjust_address (dst, QImode, o), val);
5349 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5350 emit_insn (gen_movmem_short (newdstp1, newdst,
5351 GEN_INT (l > 257 ? 255 : l - 2)));
5356 else if (TARGET_MVCLE)
5358 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5360 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5363 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5369 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5370 rtx_code_label *loop_start_label = gen_label_rtx ();
5371 rtx_code_label *onebyte_end_label = gen_label_rtx ();
5372 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5373 rtx_code_label *restbyte_end_label = gen_label_rtx ();
5376 mode = GET_MODE (len);
5377 if (mode == VOIDmode)
5380 dst_addr = gen_reg_rtx (Pmode);
5381 count = gen_reg_rtx (mode);
5382 blocks = gen_reg_rtx (mode);
5384 convert_move (count, len, 1);
5385 emit_cmp_and_jump_insns (count, const0_rtx,
5386 EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5389 /* We need to make a copy of the target address since memset is
5390 supposed to return it unmodified. We have to make it here
5391 already since the new reg is used at onebyte_end_label. */
5392 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5393 dst = change_address (dst, VOIDmode, dst_addr);
5395 if (val != const0_rtx)
5397 /* When using the overlapping mvc the original target
5398 address is only accessed as single byte entity (even by
5399 the mvc reading this value). */
5400 set_mem_size (dst, 1);
5401 dstp1 = adjust_address (dst, VOIDmode, 1);
5402 emit_cmp_and_jump_insns (count,
5403 const1_rtx, EQ, NULL_RTX, mode, 1,
5404 onebyte_end_label, very_unlikely);
5407 /* There is one unconditional (mvi+mvc)/xc after the loop
5408 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5409 or one (xc) here leaves this number of bytes to be handled by
5411 temp = expand_binop (mode, add_optab, count,
5412 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5413 count, 1, OPTAB_DIRECT);
5415 emit_move_insn (count, temp);
5417 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5420 emit_move_insn (blocks, temp);
5422 emit_cmp_and_jump_insns (blocks, const0_rtx,
5423 EQ, NULL_RTX, mode, 1, restbyte_end_label);
5425 emit_jump (loop_start_label);
5427 if (val != const0_rtx)
5429 /* The 1 byte != 0 special case. Not handled efficiently
5430 since we require two jumps for that. However, this
5431 should be very rare. */
5432 emit_label (onebyte_end_label);
5433 emit_move_insn (adjust_address (dst, QImode, 0), val);
5434 emit_jump (zerobyte_end_label);
5437 emit_label (loop_start_label);
5440 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5442 /* Issue a write prefetch for the +4 cache line. */
5443 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5445 const1_rtx, const0_rtx);
5446 emit_insn (prefetch);
5447 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5450 if (val == const0_rtx)
5451 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5454 /* Set the first byte in the block to the value and use an
5455 overlapping mvc for the block. */
5456 emit_move_insn (adjust_address (dst, QImode, 0), val);
5457 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254)));
5459 s390_load_address (dst_addr,
5460 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5462 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5465 emit_move_insn (blocks, temp);
5467 emit_cmp_and_jump_insns (blocks, const0_rtx,
5468 NE, NULL_RTX, mode, 1, loop_start_label);
5470 emit_label (restbyte_end_label);
5472 if (val == const0_rtx)
5473 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5476 /* Set the first byte in the block to the value and use an
5477 overlapping mvc for the block. */
5478 emit_move_insn (adjust_address (dst, QImode, 0), val);
5479 /* execute only uses the lowest 8 bits of count that's
5480 exactly what we need here. */
5481 emit_insn (gen_movmem_short (dstp1, dst,
5482 convert_to_mode (Pmode, count, 1)));
5485 emit_label (zerobyte_end_label);
5489 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5490 and return the result in TARGET. */
5493 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5495 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5498 /* When tuning for z10 or higher we rely on the Glibc functions to
5499 do the right thing. Only for constant lengths below 64k we will
5500 generate inline code. */
5501 if (s390_tune >= PROCESSOR_2097_Z10
5502 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5505 /* As the result of CMPINT is inverted compared to what we need,
5506 we have to swap the operands. */
5507 tmp = op0; op0 = op1; op1 = tmp;
5509 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5511 if (INTVAL (len) > 0)
5513 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5514 emit_insn (gen_cmpint (target, ccreg));
5517 emit_move_insn (target, const0_rtx);
5519 else if (TARGET_MVCLE)
5521 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5522 emit_insn (gen_cmpint (target, ccreg));
5526 rtx addr0, addr1, count, blocks, temp;
5527 rtx_code_label *loop_start_label = gen_label_rtx ();
5528 rtx_code_label *loop_end_label = gen_label_rtx ();
5529 rtx_code_label *end_label = gen_label_rtx ();
5532 mode = GET_MODE (len);
5533 if (mode == VOIDmode)
5536 addr0 = gen_reg_rtx (Pmode);
5537 addr1 = gen_reg_rtx (Pmode);
5538 count = gen_reg_rtx (mode);
5539 blocks = gen_reg_rtx (mode);
5541 convert_move (count, len, 1);
5542 emit_cmp_and_jump_insns (count, const0_rtx,
5543 EQ, NULL_RTX, mode, 1, end_label);
5545 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5546 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5547 op0 = change_address (op0, VOIDmode, addr0);
5548 op1 = change_address (op1, VOIDmode, addr1);
5550 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5553 emit_move_insn (count, temp);
5555 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5558 emit_move_insn (blocks, temp);
5560 emit_cmp_and_jump_insns (blocks, const0_rtx,
5561 EQ, NULL_RTX, mode, 1, loop_end_label);
5563 emit_label (loop_start_label);
5566 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5570 /* Issue a read prefetch for the +2 cache line of operand 1. */
5571 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5572 const0_rtx, const0_rtx);
5573 emit_insn (prefetch);
5574 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5576 /* Issue a read prefetch for the +2 cache line of operand 2. */
5577 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5578 const0_rtx, const0_rtx);
5579 emit_insn (prefetch);
5580 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5583 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5584 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5585 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5586 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5587 temp = gen_rtx_SET (pc_rtx, temp);
5588 emit_jump_insn (temp);
5590 s390_load_address (addr0,
5591 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5592 s390_load_address (addr1,
5593 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5595 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5598 emit_move_insn (blocks, temp);
5600 emit_cmp_and_jump_insns (blocks, const0_rtx,
5601 EQ, NULL_RTX, mode, 1, loop_end_label);
5603 emit_jump (loop_start_label);
5604 emit_label (loop_end_label);
5606 emit_insn (gen_cmpmem_short (op0, op1,
5607 convert_to_mode (Pmode, count, 1)));
5608 emit_label (end_label);
5610 emit_insn (gen_cmpint (target, ccreg));
5615 /* Emit a conditional jump to LABEL for condition code mask MASK using
5616 comparsion operator COMPARISON. Return the emitted jump insn. */
5619 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5623 gcc_assert (comparison == EQ || comparison == NE);
5624 gcc_assert (mask > 0 && mask < 15);
5626 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5627 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5628 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5629 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5630 temp = gen_rtx_SET (pc_rtx, temp);
5631 return emit_jump_insn (temp);
5634 /* Emit the instructions to implement strlen of STRING and store the
5635 result in TARGET. The string has the known ALIGNMENT. This
5636 version uses vector instructions and is therefore not appropriate
5637 for targets prior to z13. */
5640 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5642 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5643 int very_likely = REG_BR_PROB_BASE - 1;
5644 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5645 rtx str_reg = gen_reg_rtx (V16QImode);
5646 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5647 rtx str_idx_reg = gen_reg_rtx (Pmode);
5648 rtx result_reg = gen_reg_rtx (V16QImode);
5649 rtx is_aligned_label = gen_label_rtx ();
5650 rtx into_loop_label = NULL_RTX;
5651 rtx loop_start_label = gen_label_rtx ();
5653 rtx len = gen_reg_rtx (QImode);
5656 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5657 emit_move_insn (str_idx_reg, const0_rtx);
5659 if (INTVAL (alignment) < 16)
5661 /* Check whether the address happens to be aligned properly so
5662 jump directly to the aligned loop. */
5663 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5664 str_addr_base_reg, GEN_INT (15)),
5665 const0_rtx, EQ, NULL_RTX,
5666 Pmode, 1, is_aligned_label);
5668 temp = gen_reg_rtx (Pmode);
5669 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5670 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5671 gcc_assert (REG_P (temp));
5672 highest_index_to_load_reg =
5673 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5674 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5675 gcc_assert (REG_P (highest_index_to_load_reg));
5676 emit_insn (gen_vllv16qi (str_reg,
5677 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5678 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5680 into_loop_label = gen_label_rtx ();
5681 s390_emit_jump (into_loop_label, NULL_RTX);
5685 emit_label (is_aligned_label);
5686 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5688 /* Reaching this point we are only performing 16 bytes aligned
5690 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5692 emit_label (loop_start_label);
5693 LABEL_NUSES (loop_start_label) = 1;
5695 /* Load 16 bytes of the string into VR. */
5696 emit_move_insn (str_reg,
5697 gen_rtx_MEM (V16QImode,
5698 gen_rtx_PLUS (Pmode, str_idx_reg,
5699 str_addr_base_reg)));
5700 if (into_loop_label != NULL_RTX)
5702 emit_label (into_loop_label);
5703 LABEL_NUSES (into_loop_label) = 1;
5706 /* Increment string index by 16 bytes. */
5707 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5708 str_idx_reg, 1, OPTAB_DIRECT);
5710 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5711 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5713 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5714 REG_BR_PROB, very_likely);
5715 emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
5717 /* If the string pointer wasn't aligned we have loaded less then 16
5718 bytes and the remaining bytes got filled with zeros (by vll).
5719 Now we have to check whether the resulting index lies within the
5720 bytes actually part of the string. */
5722 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5723 highest_index_to_load_reg);
5724 s390_load_address (highest_index_to_load_reg,
5725 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5728 emit_insn (gen_movdicc (str_idx_reg, cond,
5729 highest_index_to_load_reg, str_idx_reg));
5731 emit_insn (gen_movsicc (str_idx_reg, cond,
5732 highest_index_to_load_reg, str_idx_reg));
5734 add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
5737 expand_binop (Pmode, add_optab, str_idx_reg,
5738 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5739 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5741 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5742 convert_to_mode (Pmode, len, 1),
5743 target, 1, OPTAB_DIRECT);
5745 emit_move_insn (target, temp);
5749 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5751 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5752 rtx temp = gen_reg_rtx (Pmode);
5753 rtx src_addr = XEXP (src, 0);
5754 rtx dst_addr = XEXP (dst, 0);
5755 rtx src_addr_reg = gen_reg_rtx (Pmode);
5756 rtx dst_addr_reg = gen_reg_rtx (Pmode);
5757 rtx offset = gen_reg_rtx (Pmode);
5758 rtx vsrc = gen_reg_rtx (V16QImode);
5759 rtx vpos = gen_reg_rtx (V16QImode);
5760 rtx loadlen = gen_reg_rtx (SImode);
5761 rtx gpos_qi = gen_reg_rtx(QImode);
5762 rtx gpos = gen_reg_rtx (SImode);
5763 rtx done_label = gen_label_rtx ();
5764 rtx loop_label = gen_label_rtx ();
5765 rtx exit_label = gen_label_rtx ();
5766 rtx full_label = gen_label_rtx ();
5768 /* Perform a quick check for string ending on the first up to 16
5769 bytes and exit early if successful. */
5771 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5772 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5773 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5774 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5775 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5776 /* gpos is the byte index if a zero was found and 16 otherwise.
5777 So if it is lower than the loaded bytes we have a hit. */
5778 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5780 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5782 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5784 emit_jump (exit_label);
5787 emit_label (full_label);
5788 LABEL_NUSES (full_label) = 1;
5790 /* Calculate `offset' so that src + offset points to the last byte
5791 before 16 byte alignment. */
5793 /* temp = src_addr & 0xf */
5794 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5797 /* offset = 0xf - temp */
5798 emit_move_insn (offset, GEN_INT (15));
5799 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5802 /* Store `offset' bytes in the dstination string. The quick check
5803 has loaded at least `offset' bytes into vsrc. */
5805 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5807 /* Advance to the next byte to be loaded. */
5808 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5811 /* Make sure the addresses are single regs which can be used as a
5813 emit_move_insn (src_addr_reg, src_addr);
5814 emit_move_insn (dst_addr_reg, dst_addr);
5818 emit_label (loop_label);
5819 LABEL_NUSES (loop_label) = 1;
5821 emit_move_insn (vsrc,
5822 gen_rtx_MEM (V16QImode,
5823 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
5825 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
5826 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5827 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
5828 REG_BR_PROB, very_unlikely);
5830 emit_move_insn (gen_rtx_MEM (V16QImode,
5831 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
5834 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
5835 offset, 1, OPTAB_DIRECT);
5837 emit_jump (loop_label);
5842 /* We are done. Add the offset of the zero character to the dst_addr
5843 pointer to get the result. */
5845 emit_label (done_label);
5846 LABEL_NUSES (done_label) = 1;
5848 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
5851 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5852 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5854 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
5856 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
5861 emit_label (exit_label);
5862 LABEL_NUSES (exit_label) = 1;
5866 /* Expand conditional increment or decrement using alc/slb instructions.
5867 Should generate code setting DST to either SRC or SRC + INCREMENT,
5868 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5869 Returns true if successful, false otherwise.
5871 That makes it possible to implement some if-constructs without jumps e.g.:
5872 (borrow = CC0 | CC1 and carry = CC2 | CC3)
5873 unsigned int a, b, c;
5874 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
5875 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
5876 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
5877 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
5879 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5880 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
5881 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5882 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
5883 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
5886 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5887 rtx dst, rtx src, rtx increment)
5889 machine_mode cmp_mode;
5890 machine_mode cc_mode;
5896 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
5897 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
5899 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
5900 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
5905 /* Try ADD LOGICAL WITH CARRY. */
5906 if (increment == const1_rtx)
5908 /* Determine CC mode to use. */
5909 if (cmp_code == EQ || cmp_code == NE)
5911 if (cmp_op1 != const0_rtx)
5913 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5914 NULL_RTX, 0, OPTAB_WIDEN);
5915 cmp_op1 = const0_rtx;
5918 cmp_code = cmp_code == EQ ? LEU : GTU;
5921 if (cmp_code == LTU || cmp_code == LEU)
5926 cmp_code = swap_condition (cmp_code);
5943 /* Emit comparison instruction pattern. */
5944 if (!register_operand (cmp_op0, cmp_mode))
5945 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5947 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5948 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5949 /* We use insn_invalid_p here to add clobbers if required. */
5950 ret = insn_invalid_p (emit_insn (insn), false);
5953 /* Emit ALC instruction pattern. */
5954 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5955 gen_rtx_REG (cc_mode, CC_REGNUM),
5958 if (src != const0_rtx)
5960 if (!register_operand (src, GET_MODE (dst)))
5961 src = force_reg (GET_MODE (dst), src);
5963 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
5964 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
5967 p = rtvec_alloc (2);
5969 gen_rtx_SET (dst, op_res);
5971 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5972 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5977 /* Try SUBTRACT LOGICAL WITH BORROW. */
5978 if (increment == constm1_rtx)
5980 /* Determine CC mode to use. */
5981 if (cmp_code == EQ || cmp_code == NE)
5983 if (cmp_op1 != const0_rtx)
5985 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5986 NULL_RTX, 0, OPTAB_WIDEN);
5987 cmp_op1 = const0_rtx;
5990 cmp_code = cmp_code == EQ ? LEU : GTU;
5993 if (cmp_code == GTU || cmp_code == GEU)
5998 cmp_code = swap_condition (cmp_code);
6015 /* Emit comparison instruction pattern. */
6016 if (!register_operand (cmp_op0, cmp_mode))
6017 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6019 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6020 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6021 /* We use insn_invalid_p here to add clobbers if required. */
6022 ret = insn_invalid_p (emit_insn (insn), false);
6025 /* Emit SLB instruction pattern. */
6026 if (!register_operand (src, GET_MODE (dst)))
6027 src = force_reg (GET_MODE (dst), src);
6029 op_res = gen_rtx_MINUS (GET_MODE (dst),
6030 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6031 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6032 gen_rtx_REG (cc_mode, CC_REGNUM),
6034 p = rtvec_alloc (2);
6036 gen_rtx_SET (dst, op_res);
6038 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6039 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6047 /* Expand code for the insv template. Return true if successful. */
6050 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6052 int bitsize = INTVAL (op1);
6053 int bitpos = INTVAL (op2);
6054 machine_mode mode = GET_MODE (dest);
6056 int smode_bsize, mode_bsize;
6059 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6062 /* Generate INSERT IMMEDIATE (IILL et al). */
6063 /* (set (ze (reg)) (const_int)). */
6065 && register_operand (dest, word_mode)
6066 && (bitpos % 16) == 0
6067 && (bitsize % 16) == 0
6068 && const_int_operand (src, VOIDmode))
6070 HOST_WIDE_INT val = INTVAL (src);
6071 int regpos = bitpos + bitsize;
6073 while (regpos > bitpos)
6075 machine_mode putmode;
6078 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6083 putsize = GET_MODE_BITSIZE (putmode);
6085 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6088 gen_int_mode (val, putmode));
6091 gcc_assert (regpos == bitpos);
6095 smode = smallest_mode_for_size (bitsize, MODE_INT);
6096 smode_bsize = GET_MODE_BITSIZE (smode);
6097 mode_bsize = GET_MODE_BITSIZE (mode);
6099 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6101 && (bitsize % BITS_PER_UNIT) == 0
6103 && (register_operand (src, word_mode)
6104 || const_int_operand (src, VOIDmode)))
6106 /* Emit standard pattern if possible. */
6107 if (smode_bsize == bitsize)
6109 emit_move_insn (adjust_address (dest, smode, 0),
6110 gen_lowpart (smode, src));
6114 /* (set (ze (mem)) (const_int)). */
6115 else if (const_int_operand (src, VOIDmode))
6117 int size = bitsize / BITS_PER_UNIT;
6118 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6120 UNITS_PER_WORD - size);
6122 dest = adjust_address (dest, BLKmode, 0);
6123 set_mem_size (dest, size);
6124 s390_expand_movmem (dest, src_mem, GEN_INT (size));
6128 /* (set (ze (mem)) (reg)). */
6129 else if (register_operand (src, word_mode))
6132 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6136 /* Emit st,stcmh sequence. */
6137 int stcmh_width = bitsize - 32;
6138 int size = stcmh_width / BITS_PER_UNIT;
6140 emit_move_insn (adjust_address (dest, SImode, size),
6141 gen_lowpart (SImode, src));
6142 set_mem_size (dest, size);
6143 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6144 GEN_INT (stcmh_width),
6146 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6152 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6153 if ((bitpos % BITS_PER_UNIT) == 0
6154 && (bitsize % BITS_PER_UNIT) == 0
6155 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6157 && (mode == DImode || mode == SImode)
6158 && register_operand (dest, mode))
6160 /* Emit a strict_low_part pattern if possible. */
6161 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6163 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6164 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6165 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6166 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6170 /* ??? There are more powerful versions of ICM that are not
6171 completely represented in the md file. */
6174 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6175 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6177 machine_mode mode_s = GET_MODE (src);
6179 if (CONSTANT_P (src))
6181 /* For constant zero values the representation with AND
6182 appears to be folded in more situations than the (set
6183 (zero_extract) ...).
6184 We only do this when the start and end of the bitfield
6185 remain in the same SImode chunk. That way nihf or nilf
6187 The AND patterns might still generate a risbg for this. */
6188 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6191 src = force_reg (mode, src);
6193 else if (mode_s != mode)
6195 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6196 src = force_reg (mode_s, src);
6197 src = gen_lowpart (mode, src);
6200 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6201 op = gen_rtx_SET (op, src);
6205 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6206 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6216 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6217 register that holds VAL of mode MODE shifted by COUNT bits. */
6220 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6222 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6223 NULL_RTX, 1, OPTAB_DIRECT);
6224 return expand_simple_binop (SImode, ASHIFT, val, count,
6225 NULL_RTX, 1, OPTAB_DIRECT);
6228 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6229 the result in TARGET. */
6232 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6233 rtx cmp_op1, rtx cmp_op2)
6235 machine_mode mode = GET_MODE (target);
6236 bool neg_p = false, swap_p = false;
6239 if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6243 /* NE a != b -> !(a == b) */
6244 case NE: cond = EQ; neg_p = true; break;
6245 /* UNGT a u> b -> !(b >= a) */
6246 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6247 /* UNGE a u>= b -> !(b > a) */
6248 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6249 /* LE: a <= b -> b >= a */
6250 case LE: cond = GE; swap_p = true; break;
6251 /* UNLE: a u<= b -> !(a > b) */
6252 case UNLE: cond = GT; neg_p = true; break;
6253 /* LT: a < b -> b > a */
6254 case LT: cond = GT; swap_p = true; break;
6255 /* UNLT: a u< b -> !(a >= b) */
6256 case UNLT: cond = GE; neg_p = true; break;
6258 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
6261 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
6264 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
6267 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
6276 /* NE: a != b -> !(a == b) */
6277 case NE: cond = EQ; neg_p = true; break;
6278 /* GE: a >= b -> !(b > a) */
6279 case GE: cond = GT; neg_p = true; swap_p = true; break;
6280 /* GEU: a >= b -> !(b > a) */
6281 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6282 /* LE: a <= b -> !(a > b) */
6283 case LE: cond = GT; neg_p = true; break;
6284 /* LEU: a <= b -> !(a > b) */
6285 case LEU: cond = GTU; neg_p = true; break;
6286 /* LT: a < b -> b > a */
6287 case LT: cond = GT; swap_p = true; break;
6288 /* LTU: a < b -> b > a */
6289 case LTU: cond = GTU; swap_p = true; break;
6296 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6299 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6301 cmp_op1, cmp_op2)));
6303 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6306 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6307 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6308 elements in CMP1 and CMP2 fulfill the comparison.
6309 This function is only used to emit patterns for the vx builtins and
6310 therefore only handles comparison codes required by the
6313 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6314 rtx cmp1, rtx cmp2, bool all_p)
6316 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6317 rtx tmp_reg = gen_reg_rtx (SImode);
6318 bool swap_p = false;
6320 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6326 cc_producer_mode = CCVEQmode;
6330 code = swap_condition (code);
6335 cc_producer_mode = CCVIHmode;
6339 code = swap_condition (code);
6344 cc_producer_mode = CCVIHUmode;
6350 scratch_mode = GET_MODE (cmp1);
6351 /* These codes represent inverted CC interpretations. Inverting
6352 an ALL CC mode results in an ANY CC mode and the other way
6353 around. Invert the all_p flag here to compensate for
6355 if (code == NE || code == LE || code == LEU)
6358 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6360 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6366 case EQ: cc_producer_mode = CCVEQmode; break;
6367 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6368 case GT: cc_producer_mode = CCVFHmode; break;
6369 case GE: cc_producer_mode = CCVFHEmode; break;
6370 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6371 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6372 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6373 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6374 default: gcc_unreachable ();
6376 scratch_mode = mode_for_vector (
6377 int_mode_for_mode (GET_MODE_INNER (GET_MODE (cmp1))),
6378 GET_MODE_NUNITS (GET_MODE (cmp1)));
6379 gcc_assert (scratch_mode != BLKmode);
6384 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6396 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6397 gen_rtvec (2, gen_rtx_SET (
6398 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6399 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6400 gen_rtx_CLOBBER (VOIDmode,
6401 gen_rtx_SCRATCH (scratch_mode)))));
6402 emit_move_insn (target, const0_rtx);
6403 emit_move_insn (tmp_reg, const1_rtx);
6405 emit_move_insn (target,
6406 gen_rtx_IF_THEN_ELSE (SImode,
6407 gen_rtx_fmt_ee (code, VOIDmode,
6408 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6413 /* Invert the comparison CODE applied to a CC mode. This is only safe
6414 if we know whether there result was created by a floating point
6415 compare or not. For the CCV modes this is encoded as part of the
6418 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6420 /* Reversal of FP compares takes care -- an ordered compare
6421 becomes an unordered compare and vice versa. */
6422 if (mode == CCVFALLmode || mode == CCVFANYmode)
6423 return reverse_condition_maybe_unordered (code);
6424 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6425 return reverse_condition (code);
6430 /* Generate a vector comparison expression loading either elements of
6431 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6435 s390_expand_vcond (rtx target, rtx then, rtx els,
6436 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6439 machine_mode result_mode;
6442 machine_mode target_mode = GET_MODE (target);
6443 machine_mode cmp_mode = GET_MODE (cmp_op1);
6444 rtx op = (cond == LT) ? els : then;
6446 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6447 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6448 for short and byte (x >> 15 and x >> 7 respectively). */
6449 if ((cond == LT || cond == GE)
6450 && target_mode == cmp_mode
6451 && cmp_op2 == CONST0_RTX (cmp_mode)
6452 && op == CONST0_RTX (target_mode)
6453 && s390_vector_mode_supported_p (target_mode)
6454 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6456 rtx negop = (cond == LT) ? then : els;
6458 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6460 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6461 if (negop == CONST1_RTX (target_mode))
6463 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6464 GEN_INT (shift), target,
6467 emit_move_insn (target, res);
6471 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6472 else if (all_ones_operand (negop, target_mode))
6474 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6475 GEN_INT (shift), target,
6478 emit_move_insn (target, res);
6483 /* We always use an integral type vector to hold the comparison
6485 result_mode = mode_for_vector (int_mode_for_mode (GET_MODE_INNER (cmp_mode)),
6486 GET_MODE_NUNITS (cmp_mode));
6487 result_target = gen_reg_rtx (result_mode);
6489 /* We allow vector immediates as comparison operands that
6490 can be handled by the optimization above but not by the
6491 following code. Hence, force them into registers here. */
6492 if (!REG_P (cmp_op1))
6493 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6495 if (!REG_P (cmp_op2))
6496 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6498 s390_expand_vec_compare (result_target, cond,
6501 /* If the results are supposed to be either -1 or 0 we are done
6502 since this is what our compare instructions generate anyway. */
6503 if (all_ones_operand (then, GET_MODE (then))
6504 && const0_operand (els, GET_MODE (els)))
6506 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6511 /* Otherwise we will do a vsel afterwards. */
6512 /* This gets triggered e.g.
6513 with gcc.c-torture/compile/pr53410-1.c */
6515 then = force_reg (target_mode, then);
6518 els = force_reg (target_mode, els);
6520 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6522 CONST0_RTX (result_mode));
6524 /* We compared the result against zero above so we have to swap then
6526 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6528 gcc_assert (target_mode == GET_MODE (then));
6529 emit_insn (gen_rtx_SET (target, tmp));
6532 /* Emit the RTX necessary to initialize the vector TARGET with values
6535 s390_expand_vec_init (rtx target, rtx vals)
6537 machine_mode mode = GET_MODE (target);
6538 machine_mode inner_mode = GET_MODE_INNER (mode);
6539 int n_elts = GET_MODE_NUNITS (mode);
6540 bool all_same = true, all_regs = true, all_const_int = true;
6544 for (i = 0; i < n_elts; ++i)
6546 x = XVECEXP (vals, 0, i);
6548 if (!CONST_INT_P (x))
6549 all_const_int = false;
6551 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6558 /* Use vector gen mask or vector gen byte mask if possible. */
6559 if (all_same && all_const_int
6560 && (XVECEXP (vals, 0, 0) == const0_rtx
6561 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6563 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6565 emit_insn (gen_rtx_SET (target,
6566 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6572 emit_insn (gen_rtx_SET (target,
6573 gen_rtx_VEC_DUPLICATE (mode,
6574 XVECEXP (vals, 0, 0))));
6581 && GET_MODE_SIZE (inner_mode) == 8)
6583 /* Use vector load pair. */
6584 emit_insn (gen_rtx_SET (target,
6585 gen_rtx_VEC_CONCAT (mode,
6586 XVECEXP (vals, 0, 0),
6587 XVECEXP (vals, 0, 1))));
6591 /* Use vector load logical element and zero. */
6592 if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6596 x = XVECEXP (vals, 0, 0);
6597 if (memory_operand (x, inner_mode))
6599 for (i = 1; i < n_elts; ++i)
6600 found = found && XVECEXP (vals, 0, i) == const0_rtx;
6604 machine_mode half_mode = (inner_mode == SFmode
6605 ? V2SFmode : V2SImode);
6606 emit_insn (gen_rtx_SET (target,
6607 gen_rtx_VEC_CONCAT (mode,
6608 gen_rtx_VEC_CONCAT (half_mode,
6611 gen_rtx_VEC_CONCAT (half_mode,
6619 /* We are about to set the vector elements one by one. Zero out the
6620 full register first in order to help the data flow framework to
6621 detect it as full VR set. */
6622 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6624 /* Unfortunately the vec_init expander is not allowed to fail. So
6625 we have to implement the fallback ourselves. */
6626 for (i = 0; i < n_elts; i++)
6628 rtx elem = XVECEXP (vals, 0, i);
6629 if (!general_operand (elem, GET_MODE (elem)))
6630 elem = force_reg (inner_mode, elem);
6632 emit_insn (gen_rtx_SET (target,
6633 gen_rtx_UNSPEC (mode,
6635 GEN_INT (i), target),
6640 /* Structure to hold the initial parameters for a compare_and_swap operation
6641 in HImode and QImode. */
6643 struct alignment_context
6645 rtx memsi; /* SI aligned memory location. */
6646 rtx shift; /* Bit offset with regard to lsb. */
6647 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6648 rtx modemaski; /* ~modemask */
6649 bool aligned; /* True if memory is aligned, false else. */
6652 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6653 structure AC for transparent simplifying, if the memory alignment is known
6654 to be at least 32bit. MEM is the memory location for the actual operation
6655 and MODE its mode. */
6658 init_alignment_context (struct alignment_context *ac, rtx mem,
6661 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6662 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6665 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6668 /* Alignment is unknown. */
6669 rtx byteoffset, addr, align;
6671 /* Force the address into a register. */
6672 addr = force_reg (Pmode, XEXP (mem, 0));
6674 /* Align it to SImode. */
6675 align = expand_simple_binop (Pmode, AND, addr,
6676 GEN_INT (-GET_MODE_SIZE (SImode)),
6677 NULL_RTX, 1, OPTAB_DIRECT);
6679 ac->memsi = gen_rtx_MEM (SImode, align);
6680 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6681 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6682 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6684 /* Calculate shiftcount. */
6685 byteoffset = expand_simple_binop (Pmode, AND, addr,
6686 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6687 NULL_RTX, 1, OPTAB_DIRECT);
6688 /* As we already have some offset, evaluate the remaining distance. */
6689 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6690 NULL_RTX, 1, OPTAB_DIRECT);
6693 /* Shift is the byte count, but we need the bitcount. */
6694 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6695 NULL_RTX, 1, OPTAB_DIRECT);
6697 /* Calculate masks. */
6698 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6699 GEN_INT (GET_MODE_MASK (mode)),
6700 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6701 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6705 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6706 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6707 perform the merge in SEQ2. */
6710 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6711 machine_mode mode, rtx val, rtx ins)
6718 tmp = copy_to_mode_reg (SImode, val);
6719 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6723 *seq2 = get_insns ();
6730 /* Failed to use insv. Generate a two part shift and mask. */
6732 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6733 *seq1 = get_insns ();
6737 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6738 *seq2 = get_insns ();
6744 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6745 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6746 value to set if CMP == MEM. */
6749 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6750 rtx cmp, rtx new_rtx, bool is_weak)
6752 struct alignment_context ac;
6753 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6754 rtx res = gen_reg_rtx (SImode);
6755 rtx_code_label *csloop = NULL, *csend = NULL;
6757 gcc_assert (MEM_P (mem));
6759 init_alignment_context (&ac, mem, mode);
6761 /* Load full word. Subsequent loads are performed by CS. */
6762 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6763 NULL_RTX, 1, OPTAB_DIRECT);
6765 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6766 possible, we try to use insv to make this happen efficiently. If
6767 that fails we'll generate code both inside and outside the loop. */
6768 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6769 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6776 /* Start CS loop. */
6779 /* Begin assuming success. */
6780 emit_move_insn (btarget, const1_rtx);
6782 csloop = gen_label_rtx ();
6783 csend = gen_label_rtx ();
6784 emit_label (csloop);
6787 /* val = "<mem>00..0<mem>"
6788 * cmp = "00..0<cmp>00..0"
6789 * new = "00..0<new>00..0"
6795 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
6797 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6802 /* Jump to end if we're done (likely?). */
6803 s390_emit_jump (csend, cc);
6805 /* Check for changes outside mode, and loop internal if so.
6806 Arrange the moves so that the compare is adjacent to the
6807 branch so that we can generate CRJ. */
6808 tmp = copy_to_reg (val);
6809 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6811 cc = s390_emit_compare (NE, val, tmp);
6812 s390_emit_jump (csloop, cc);
6815 emit_move_insn (btarget, const0_rtx);
6819 /* Return the correct part of the bitfield. */
6820 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6821 NULL_RTX, 1, OPTAB_DIRECT), 1);
6824 /* Variant of s390_expand_cs for SI, DI and TI modes. */
6826 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6827 rtx cmp, rtx new_rtx, bool is_weak)
6829 rtx output = vtarget;
6830 rtx_code_label *skip_cs_label = NULL;
6831 bool do_const_opt = false;
6833 if (!register_operand (output, mode))
6834 output = gen_reg_rtx (mode);
6836 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
6837 with the constant first and skip the compare_and_swap because its very
6838 expensive and likely to fail anyway.
6839 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
6840 cause spurious in that case.
6841 Note 2: It may be useful to do this also for non-constant INPUT.
6842 Note 3: Currently only targets with "load on condition" are supported
6843 (z196 and newer). */
6846 && (mode == SImode || mode == DImode))
6847 do_const_opt = (is_weak && CONST_INT_P (cmp));
6851 const int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
6852 rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
6854 skip_cs_label = gen_label_rtx ();
6855 emit_move_insn (btarget, const0_rtx);
6856 if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
6858 rtvec lt = rtvec_alloc (2);
6860 /* Load-and-test + conditional jump. */
6862 = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
6863 RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
6864 emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
6868 emit_move_insn (output, mem);
6869 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
6871 s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
6872 add_int_reg_note (get_last_insn (), REG_BR_PROB, very_unlikely);
6873 /* If the jump is not taken, OUTPUT is the expected value. */
6875 /* Reload newval to a register manually, *after* the compare and jump
6876 above. Otherwise Reload might place it before the jump. */
6879 cmp = force_reg (mode, cmp);
6880 new_rtx = force_reg (mode, new_rtx);
6881 s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
6882 (do_const_opt) ? CCZmode : CCZ1mode);
6883 if (skip_cs_label != NULL)
6884 emit_label (skip_cs_label);
6886 /* We deliberately accept non-register operands in the predicate
6887 to ensure the write back to the output operand happens *before*
6888 the store-flags code below. This makes it easier for combine
6889 to merge the store-flags code with a potential test-and-branch
6890 pattern following (immediately!) afterwards. */
6891 if (output != vtarget)
6892 emit_move_insn (vtarget, output);
6898 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
6899 btarget has already been initialized with 0 above. */
6900 cc = gen_rtx_REG (CCZmode, CC_REGNUM);
6901 cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
6902 ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
6903 emit_insn (gen_rtx_SET (btarget, ite));
6909 cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
6910 cond = gen_rtx_EQ (SImode, cc, const0_rtx);
6911 emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
6915 /* Expand an atomic compare and swap operation. MEM is the memory location,
6916 CMP the old value to compare MEM with and NEW_RTX the value to set if
6920 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6921 rtx cmp, rtx new_rtx, bool is_weak)
6928 s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
6932 s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
6939 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
6940 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
6944 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
6946 machine_mode mode = GET_MODE (mem);
6947 rtx_code_label *csloop;
6950 && (mode == DImode || mode == SImode)
6951 && CONST_INT_P (input) && INTVAL (input) == 0)
6953 emit_move_insn (output, const0_rtx);
6955 emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
6957 emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
6961 input = force_reg (mode, input);
6962 emit_move_insn (output, mem);
6963 csloop = gen_label_rtx ();
6964 emit_label (csloop);
6965 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
6969 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
6970 and VAL the value to play with. If AFTER is true then store the value
6971 MEM holds after the operation, if AFTER is false then store the value MEM
6972 holds before the operation. If TARGET is zero then discard that value, else
6973 store it to TARGET. */
6976 s390_expand_atomic (machine_mode mode, enum rtx_code code,
6977 rtx target, rtx mem, rtx val, bool after)
6979 struct alignment_context ac;
6981 rtx new_rtx = gen_reg_rtx (SImode);
6982 rtx orig = gen_reg_rtx (SImode);
6983 rtx_code_label *csloop = gen_label_rtx ();
6985 gcc_assert (!target || register_operand (target, VOIDmode));
6986 gcc_assert (MEM_P (mem));
6988 init_alignment_context (&ac, mem, mode);
6990 /* Shift val to the correct bit positions.
6991 Preserve "icm", but prevent "ex icm". */
6992 if (!(ac.aligned && code == SET && MEM_P (val)))
6993 val = s390_expand_mask_and_shift (val, mode, ac.shift);
6995 /* Further preparation insns. */
6996 if (code == PLUS || code == MINUS)
6997 emit_move_insn (orig, val);
6998 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
6999 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7000 NULL_RTX, 1, OPTAB_DIRECT);
7002 /* Load full word. Subsequent loads are performed by CS. */
7003 cmp = force_reg (SImode, ac.memsi);
7005 /* Start CS loop. */
7006 emit_label (csloop);
7007 emit_move_insn (new_rtx, cmp);
7009 /* Patch new with val at correct position. */
7014 val = expand_simple_binop (SImode, code, new_rtx, orig,
7015 NULL_RTX, 1, OPTAB_DIRECT);
7016 val = expand_simple_binop (SImode, AND, val, ac.modemask,
7017 NULL_RTX, 1, OPTAB_DIRECT);
7020 if (ac.aligned && MEM_P (val))
7021 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7022 0, 0, SImode, val, false);
7025 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7026 NULL_RTX, 1, OPTAB_DIRECT);
7027 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7028 NULL_RTX, 1, OPTAB_DIRECT);
7034 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7035 NULL_RTX, 1, OPTAB_DIRECT);
7037 case MULT: /* NAND */
7038 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7039 NULL_RTX, 1, OPTAB_DIRECT);
7040 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7041 NULL_RTX, 1, OPTAB_DIRECT);
7047 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7048 ac.memsi, cmp, new_rtx,
7051 /* Return the correct part of the bitfield. */
7053 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7054 after ? new_rtx : cmp, ac.shift,
7055 NULL_RTX, 1, OPTAB_DIRECT), 1);
7058 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7059 We need to emit DTP-relative relocations. */
7061 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7064 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7069 fputs ("\t.long\t", file);
7072 fputs ("\t.quad\t", file);
7077 output_addr_const (file, x);
7078 fputs ("@DTPOFF", file);
7081 /* Return the proper mode for REGNO being represented in the dwarf
7084 s390_dwarf_frame_reg_mode (int regno)
7086 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7088 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7089 if (GENERAL_REGNO_P (regno))
7092 /* The rightmost 64 bits of vector registers are call-clobbered. */
7093 if (GET_MODE_SIZE (save_mode) > 8)
7099 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7100 /* Implement TARGET_MANGLE_TYPE. */
7103 s390_mangle_type (const_tree type)
7105 type = TYPE_MAIN_VARIANT (type);
7107 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7108 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7111 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7112 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7113 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7114 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7116 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7117 && TARGET_LONG_DOUBLE_128)
7120 /* For all other types, use normal C++ mangling. */
7125 /* In the name of slightly smaller debug output, and to cater to
7126 general assembler lossage, recognize various UNSPEC sequences
7127 and turn them back into a direct symbol reference. */
7130 s390_delegitimize_address (rtx orig_x)
7134 orig_x = delegitimize_mem_from_attrs (orig_x);
7137 /* Extract the symbol ref from:
7138 (plus:SI (reg:SI 12 %r12)
7139 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7140 UNSPEC_GOTOFF/PLTOFF)))
7142 (plus:SI (reg:SI 12 %r12)
7143 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7144 UNSPEC_GOTOFF/PLTOFF)
7145 (const_int 4 [0x4])))) */
7146 if (GET_CODE (x) == PLUS
7147 && REG_P (XEXP (x, 0))
7148 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7149 && GET_CODE (XEXP (x, 1)) == CONST)
7151 HOST_WIDE_INT offset = 0;
7153 /* The const operand. */
7154 y = XEXP (XEXP (x, 1), 0);
7156 if (GET_CODE (y) == PLUS
7157 && GET_CODE (XEXP (y, 1)) == CONST_INT)
7159 offset = INTVAL (XEXP (y, 1));
7163 if (GET_CODE (y) == UNSPEC
7164 && (XINT (y, 1) == UNSPEC_GOTOFF
7165 || XINT (y, 1) == UNSPEC_PLTOFF))
7166 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7169 if (GET_CODE (x) != MEM)
7173 if (GET_CODE (x) == PLUS
7174 && GET_CODE (XEXP (x, 1)) == CONST
7175 && GET_CODE (XEXP (x, 0)) == REG
7176 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7178 y = XEXP (XEXP (x, 1), 0);
7179 if (GET_CODE (y) == UNSPEC
7180 && XINT (y, 1) == UNSPEC_GOT)
7181 y = XVECEXP (y, 0, 0);
7185 else if (GET_CODE (x) == CONST)
7187 /* Extract the symbol ref from:
7188 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7189 UNSPEC_PLT/GOTENT))) */
7192 if (GET_CODE (y) == UNSPEC
7193 && (XINT (y, 1) == UNSPEC_GOTENT
7194 || XINT (y, 1) == UNSPEC_PLT))
7195 y = XVECEXP (y, 0, 0);
7202 if (GET_MODE (orig_x) != Pmode)
7204 if (GET_MODE (orig_x) == BLKmode)
7206 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7213 /* Output operand OP to stdio stream FILE.
7214 OP is an address (register + offset) which is not used to address data;
7215 instead the rightmost bits are interpreted as the value. */
7218 print_addrstyle_operand (FILE *file, rtx op)
7220 HOST_WIDE_INT offset;
7223 /* Extract base register and offset. */
7224 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7230 gcc_assert (GET_CODE (base) == REG);
7231 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7232 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7235 /* Offsets are constricted to twelve bits. */
7236 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7238 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7241 /* Assigns the number of NOP halfwords to be emitted before and after the
7242 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7243 If hotpatching is disabled for the function, the values are set to zero.
7247 s390_function_num_hotpatch_hw (tree decl,
7253 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7255 /* Handle the arguments of the hotpatch attribute. The values
7256 specified via attribute might override the cmdline argument
7260 tree args = TREE_VALUE (attr);
7262 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7263 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7267 /* Use the values specified by the cmdline arguments. */
7268 *hw_before = s390_hotpatch_hw_before_label;
7269 *hw_after = s390_hotpatch_hw_after_label;
7273 /* Write the current .machine and .machinemode specification to the assembler
7276 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7278 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7280 fprintf (asm_out_file, "\t.machinemode %s\n",
7281 (TARGET_ZARCH) ? "zarch" : "esa");
7282 fprintf (asm_out_file, "\t.machine \"%s", processor_table[s390_arch].name);
7283 if (S390_USE_ARCHITECTURE_MODIFIERS)
7287 cpu_flags = processor_flags_table[(int) s390_arch];
7288 if (TARGET_HTM && !(cpu_flags & PF_TX))
7289 fprintf (asm_out_file, "+htm");
7290 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7291 fprintf (asm_out_file, "+nohtm");
7292 if (TARGET_VX && !(cpu_flags & PF_VX))
7293 fprintf (asm_out_file, "+vx");
7294 else if (!TARGET_VX && (cpu_flags & PF_VX))
7295 fprintf (asm_out_file, "+novx");
7297 fprintf (asm_out_file, "\"\n");
7300 /* Write an extra function header before the very start of the function. */
7303 s390_asm_output_function_prefix (FILE *asm_out_file,
7304 const char *fnname ATTRIBUTE_UNUSED)
7306 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7308 /* Since only the function specific options are saved but not the indications
7309 which options are set, it's too much work here to figure out which options
7310 have actually changed. Thus, generate .machine and .machinemode whenever a
7311 function has the target attribute or pragma. */
7312 fprintf (asm_out_file, "\t.machinemode push\n");
7313 fprintf (asm_out_file, "\t.machine push\n");
7314 s390_asm_output_machine_for_arch (asm_out_file);
7317 /* Write an extra function footer after the very end of the function. */
7320 s390_asm_declare_function_size (FILE *asm_out_file,
7321 const char *fnname, tree decl)
7323 if (!flag_inhibit_size_directive)
7324 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7325 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7327 fprintf (asm_out_file, "\t.machine pop\n");
7328 fprintf (asm_out_file, "\t.machinemode pop\n");
7332 /* Write the extra assembler code needed to declare a function properly. */
7335 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7338 int hw_before, hw_after;
7340 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7343 unsigned int function_alignment;
7346 /* Add a trampoline code area before the function label and initialize it
7347 with two-byte nop instructions. This area can be overwritten with code
7348 that jumps to a patched version of the function. */
7349 asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7350 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7352 for (i = 1; i < hw_before; i++)
7353 fputs ("\tnopr\t%r0\n", asm_out_file);
7355 /* Note: The function label must be aligned so that (a) the bytes of the
7356 following nop do not cross a cacheline boundary, and (b) a jump address
7357 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7358 stored directly before the label without crossing a cacheline
7359 boundary. All this is necessary to make sure the trampoline code can
7360 be changed atomically.
7361 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7362 if there are NOPs before the function label, the alignment is placed
7363 before them. So it is necessary to duplicate the alignment after the
7365 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7366 if (! DECL_USER_ALIGN (decl))
7367 function_alignment = MAX (function_alignment,
7368 (unsigned int) align_functions);
7369 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7370 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
7373 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7375 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7376 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7377 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7378 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7379 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7380 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7381 s390_warn_framesize);
7382 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7383 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7384 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7385 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7386 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7387 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7388 TARGET_PACKED_STACK);
7389 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7390 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7391 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7392 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7393 s390_warn_dynamicstack_p);
7395 ASM_OUTPUT_LABEL (asm_out_file, fname);
7397 asm_fprintf (asm_out_file,
7398 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7402 /* Output machine-dependent UNSPECs occurring in address constant X
7403 in assembler syntax to stdio stream FILE. Returns true if the
7404 constant X could be recognized, false otherwise. */
7407 s390_output_addr_const_extra (FILE *file, rtx x)
7409 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7410 switch (XINT (x, 1))
7413 output_addr_const (file, XVECEXP (x, 0, 0));
7414 fprintf (file, "@GOTENT");
7417 output_addr_const (file, XVECEXP (x, 0, 0));
7418 fprintf (file, "@GOT");
7421 output_addr_const (file, XVECEXP (x, 0, 0));
7422 fprintf (file, "@GOTOFF");
7425 output_addr_const (file, XVECEXP (x, 0, 0));
7426 fprintf (file, "@PLT");
7429 output_addr_const (file, XVECEXP (x, 0, 0));
7430 fprintf (file, "@PLTOFF");
7433 output_addr_const (file, XVECEXP (x, 0, 0));
7434 fprintf (file, "@TLSGD");
7437 assemble_name (file, get_some_local_dynamic_name ());
7438 fprintf (file, "@TLSLDM");
7441 output_addr_const (file, XVECEXP (x, 0, 0));
7442 fprintf (file, "@DTPOFF");
7445 output_addr_const (file, XVECEXP (x, 0, 0));
7446 fprintf (file, "@NTPOFF");
7448 case UNSPEC_GOTNTPOFF:
7449 output_addr_const (file, XVECEXP (x, 0, 0));
7450 fprintf (file, "@GOTNTPOFF");
7452 case UNSPEC_INDNTPOFF:
7453 output_addr_const (file, XVECEXP (x, 0, 0));
7454 fprintf (file, "@INDNTPOFF");
7458 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7459 switch (XINT (x, 1))
7461 case UNSPEC_POOL_OFFSET:
7462 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7463 output_addr_const (file, x);
7469 /* Output address operand ADDR in assembler syntax to
7470 stdio stream FILE. */
7473 print_operand_address (FILE *file, rtx addr)
7475 struct s390_address ad;
7476 memset (&ad, 0, sizeof (s390_address));
7478 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7482 output_operand_lossage ("symbolic memory references are "
7483 "only supported on z10 or later");
7486 output_addr_const (file, addr);
7490 if (!s390_decompose_address (addr, &ad)
7491 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7492 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7493 output_operand_lossage ("cannot decompose address");
7496 output_addr_const (file, ad.disp);
7498 fprintf (file, "0");
7500 if (ad.base && ad.indx)
7501 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7502 reg_names[REGNO (ad.base)]);
7504 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7507 /* Output operand X in assembler syntax to stdio stream FILE.
7508 CODE specified the format flag. The following format flags
7511 'C': print opcode suffix for branch condition.
7512 'D': print opcode suffix for inverse branch condition.
7513 'E': print opcode suffix for branch on index instruction.
7514 'G': print the size of the operand in bytes.
7515 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7516 'M': print the second word of a TImode operand.
7517 'N': print the second word of a DImode operand.
7518 'O': print only the displacement of a memory reference or address.
7519 'R': print only the base register of a memory reference or address.
7520 'S': print S-type memory reference (base+displacement).
7521 'Y': print address style operand without index (e.g. shift count or setmem
7524 'b': print integer X as if it's an unsigned byte.
7525 'c': print integer X as if it's an signed byte.
7526 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7527 'f': "end" contiguous bitmask X in SImode.
7528 'h': print integer X as if it's a signed halfword.
7529 'i': print the first nonzero HImode part of X.
7530 'j': print the first HImode part unequal to -1 of X.
7531 'k': print the first nonzero SImode part of X.
7532 'm': print the first SImode part unequal to -1 of X.
7533 'o': print integer X as if it's an unsigned 32bit word.
7534 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7535 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7536 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7537 'x': print integer X as if it's an unsigned halfword.
7538 'v': print register number as vector register (v1 instead of f1).
7542 print_operand (FILE *file, rtx x, int code)
7549 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7553 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7557 if (GET_CODE (x) == LE)
7558 fprintf (file, "l");
7559 else if (GET_CODE (x) == GT)
7560 fprintf (file, "h");
7562 output_operand_lossage ("invalid comparison operator "
7563 "for 'E' output modifier");
7567 if (GET_CODE (x) == SYMBOL_REF)
7569 fprintf (file, "%s", ":tls_load:");
7570 output_addr_const (file, x);
7572 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7574 fprintf (file, "%s", ":tls_gdcall:");
7575 output_addr_const (file, XVECEXP (x, 0, 0));
7577 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7579 fprintf (file, "%s", ":tls_ldcall:");
7580 const char *name = get_some_local_dynamic_name ();
7582 assemble_name (file, name);
7585 output_operand_lossage ("invalid reference for 'J' output modifier");
7589 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7594 struct s390_address ad;
7597 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7600 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7603 output_operand_lossage ("invalid address for 'O' output modifier");
7608 output_addr_const (file, ad.disp);
7610 fprintf (file, "0");
7616 struct s390_address ad;
7619 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7622 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7625 output_operand_lossage ("invalid address for 'R' output modifier");
7630 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7632 fprintf (file, "0");
7638 struct s390_address ad;
7643 output_operand_lossage ("memory reference expected for "
7644 "'S' output modifier");
7647 ret = s390_decompose_address (XEXP (x, 0), &ad);
7650 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7653 output_operand_lossage ("invalid address for 'S' output modifier");
7658 output_addr_const (file, ad.disp);
7660 fprintf (file, "0");
7663 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7668 if (GET_CODE (x) == REG)
7669 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7670 else if (GET_CODE (x) == MEM)
7671 x = change_address (x, VOIDmode,
7672 plus_constant (Pmode, XEXP (x, 0), 4));
7674 output_operand_lossage ("register or memory expression expected "
7675 "for 'N' output modifier");
7679 if (GET_CODE (x) == REG)
7680 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7681 else if (GET_CODE (x) == MEM)
7682 x = change_address (x, VOIDmode,
7683 plus_constant (Pmode, XEXP (x, 0), 8));
7685 output_operand_lossage ("register or memory expression expected "
7686 "for 'M' output modifier");
7690 print_addrstyle_operand (file, x);
7694 switch (GET_CODE (x))
7697 /* Print FP regs as fx instead of vx when they are accessed
7698 through non-vector mode. */
7700 || VECTOR_NOFP_REG_P (x)
7701 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7702 || (VECTOR_REG_P (x)
7703 && (GET_MODE_SIZE (GET_MODE (x)) /
7704 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7705 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7707 fprintf (file, "%s", reg_names[REGNO (x)]);
7711 output_address (GET_MODE (x), XEXP (x, 0));
7718 output_addr_const (file, x);
7731 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7737 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7740 ival = s390_extract_part (x, HImode, 0);
7743 ival = s390_extract_part (x, HImode, -1);
7746 ival = s390_extract_part (x, SImode, 0);
7749 ival = s390_extract_part (x, SImode, -1);
7761 len = (code == 's' || code == 'e' ? 64 : 32);
7762 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7764 if (code == 's' || code == 't')
7771 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7773 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7776 case CONST_WIDE_INT:
7778 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7779 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7780 else if (code == 'x')
7781 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7782 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7783 else if (code == 'h')
7784 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7785 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7789 output_operand_lossage ("invalid constant - try using "
7790 "an output modifier");
7792 output_operand_lossage ("invalid constant for output modifier '%c'",
7800 gcc_assert (const_vec_duplicate_p (x));
7801 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7802 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7810 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
7812 ival = (code == 's') ? start : end;
7813 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7819 bool ok = s390_bytemask_vector_p (x, &mask);
7821 fprintf (file, "%u", mask);
7826 output_operand_lossage ("invalid constant vector for output "
7827 "modifier '%c'", code);
7833 output_operand_lossage ("invalid expression - try using "
7834 "an output modifier");
7836 output_operand_lossage ("invalid expression for output "
7837 "modifier '%c'", code);
7842 /* Target hook for assembling integer objects. We need to define it
7843 here to work a round a bug in some versions of GAS, which couldn't
7844 handle values smaller than INT_MIN when printed in decimal. */
7847 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7849 if (size == 8 && aligned_p
7850 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7852 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7856 return default_assemble_integer (x, size, aligned_p);
7859 /* Returns true if register REGNO is used for forming
7860 a memory address in expression X. */
7863 reg_used_in_mem_p (int regno, rtx x)
7865 enum rtx_code code = GET_CODE (x);
7871 if (refers_to_regno_p (regno, XEXP (x, 0)))
7874 else if (code == SET
7875 && GET_CODE (SET_DEST (x)) == PC)
7877 if (refers_to_regno_p (regno, SET_SRC (x)))
7881 fmt = GET_RTX_FORMAT (code);
7882 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7885 && reg_used_in_mem_p (regno, XEXP (x, i)))
7888 else if (fmt[i] == 'E')
7889 for (j = 0; j < XVECLEN (x, i); j++)
7890 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
7896 /* Returns true if expression DEP_RTX sets an address register
7897 used by instruction INSN to address memory. */
7900 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
7904 if (NONJUMP_INSN_P (dep_rtx))
7905 dep_rtx = PATTERN (dep_rtx);
7907 if (GET_CODE (dep_rtx) == SET)
7909 target = SET_DEST (dep_rtx);
7910 if (GET_CODE (target) == STRICT_LOW_PART)
7911 target = XEXP (target, 0);
7912 while (GET_CODE (target) == SUBREG)
7913 target = SUBREG_REG (target);
7915 if (GET_CODE (target) == REG)
7917 int regno = REGNO (target);
7919 if (s390_safe_attr_type (insn) == TYPE_LA)
7921 pat = PATTERN (insn);
7922 if (GET_CODE (pat) == PARALLEL)
7924 gcc_assert (XVECLEN (pat, 0) == 2);
7925 pat = XVECEXP (pat, 0, 0);
7927 gcc_assert (GET_CODE (pat) == SET);
7928 return refers_to_regno_p (regno, SET_SRC (pat));
7930 else if (get_attr_atype (insn) == ATYPE_AGEN)
7931 return reg_used_in_mem_p (regno, PATTERN (insn));
7937 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
7940 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
7942 rtx dep_rtx = PATTERN (dep_insn);
7945 if (GET_CODE (dep_rtx) == SET
7946 && addr_generation_dependency_p (dep_rtx, insn))
7948 else if (GET_CODE (dep_rtx) == PARALLEL)
7950 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
7952 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
7960 /* A C statement (sans semicolon) to update the integer scheduling priority
7961 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
7962 reduce the priority to execute INSN later. Do not define this macro if
7963 you do not need to adjust the scheduling priorities of insns.
7965 A STD instruction should be scheduled earlier,
7966 in order to use the bypass. */
7968 s390_adjust_priority (rtx_insn *insn, int priority)
7970 if (! INSN_P (insn))
7973 if (s390_tune <= PROCESSOR_2064_Z900)
7976 switch (s390_safe_attr_type (insn))
7980 priority = priority << 3;
7984 priority = priority << 1;
7993 /* The number of instructions that can be issued per cycle. */
7996 s390_issue_rate (void)
8000 case PROCESSOR_2084_Z990:
8001 case PROCESSOR_2094_Z9_109:
8002 case PROCESSOR_2094_Z9_EC:
8003 case PROCESSOR_2817_Z196:
8005 case PROCESSOR_2097_Z10:
8007 case PROCESSOR_9672_G5:
8008 case PROCESSOR_9672_G6:
8009 case PROCESSOR_2064_Z900:
8010 /* Starting with EC12 we use the sched_reorder hook to take care
8011 of instruction dispatch constraints. The algorithm only
8012 picks the best instruction and assumes only a single
8013 instruction gets issued per cycle. */
8014 case PROCESSOR_2827_ZEC12:
8015 case PROCESSOR_2964_Z13:
8016 case PROCESSOR_ARCH12:
8023 s390_first_cycle_multipass_dfa_lookahead (void)
8028 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
8029 Fix up MEMs as required. */
8032 annotate_constant_pool_refs (rtx *x)
8037 gcc_assert (GET_CODE (*x) != SYMBOL_REF
8038 || !CONSTANT_POOL_ADDRESS_P (*x));
8040 /* Literal pool references can only occur inside a MEM ... */
8041 if (GET_CODE (*x) == MEM)
8043 rtx memref = XEXP (*x, 0);
8045 if (GET_CODE (memref) == SYMBOL_REF
8046 && CONSTANT_POOL_ADDRESS_P (memref))
8048 rtx base = cfun->machine->base_reg;
8049 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8052 *x = replace_equiv_address (*x, addr);
8056 if (GET_CODE (memref) == CONST
8057 && GET_CODE (XEXP (memref, 0)) == PLUS
8058 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8059 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8060 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8062 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8063 rtx sym = XEXP (XEXP (memref, 0), 0);
8064 rtx base = cfun->machine->base_reg;
8065 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8068 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8073 /* ... or a load-address type pattern. */
8074 if (GET_CODE (*x) == SET)
8076 rtx addrref = SET_SRC (*x);
8078 if (GET_CODE (addrref) == SYMBOL_REF
8079 && CONSTANT_POOL_ADDRESS_P (addrref))
8081 rtx base = cfun->machine->base_reg;
8082 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8085 SET_SRC (*x) = addr;
8089 if (GET_CODE (addrref) == CONST
8090 && GET_CODE (XEXP (addrref, 0)) == PLUS
8091 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8092 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8093 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8095 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8096 rtx sym = XEXP (XEXP (addrref, 0), 0);
8097 rtx base = cfun->machine->base_reg;
8098 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8101 SET_SRC (*x) = plus_constant (Pmode, addr, off);
8106 /* Annotate LTREL_BASE as well. */
8107 if (GET_CODE (*x) == UNSPEC
8108 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8110 rtx base = cfun->machine->base_reg;
8111 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
8116 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8117 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8121 annotate_constant_pool_refs (&XEXP (*x, i));
8123 else if (fmt[i] == 'E')
8125 for (j = 0; j < XVECLEN (*x, i); j++)
8126 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
8131 /* Split all branches that exceed the maximum distance.
8132 Returns true if this created a new literal pool entry. */
8135 s390_split_branches (void)
8137 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8138 int new_literal = 0, ret;
8143 /* We need correct insn addresses. */
8145 shorten_branches (get_insns ());
8147 /* Find all branches that exceed 64KB, and split them. */
8149 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8151 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
8154 pat = PATTERN (insn);
8155 if (GET_CODE (pat) == PARALLEL)
8156 pat = XVECEXP (pat, 0, 0);
8157 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
8160 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
8162 label = &SET_SRC (pat);
8164 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
8166 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
8167 label = &XEXP (SET_SRC (pat), 1);
8168 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
8169 label = &XEXP (SET_SRC (pat), 2);
8176 if (get_attr_length (insn) <= 4)
8179 /* We are going to use the return register as scratch register,
8180 make sure it will be saved/restored by the prologue/epilogue. */
8181 cfun_frame_layout.save_return_addr_p = 1;
8186 rtx mem = force_const_mem (Pmode, *label);
8187 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
8189 INSN_ADDRESSES_NEW (set_insn, -1);
8190 annotate_constant_pool_refs (&PATTERN (set_insn));
8197 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
8198 UNSPEC_LTREL_OFFSET);
8199 target = gen_rtx_CONST (Pmode, target);
8200 target = force_const_mem (Pmode, target);
8201 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
8203 INSN_ADDRESSES_NEW (set_insn, -1);
8204 annotate_constant_pool_refs (&PATTERN (set_insn));
8206 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
8207 cfun->machine->base_reg),
8209 target = gen_rtx_PLUS (Pmode, temp_reg, target);
8212 ret = validate_change (insn, label, target, 0);
8220 /* Find an annotated literal pool symbol referenced in RTX X,
8221 and store it at REF. Will abort if X contains references to
8222 more than one such pool symbol; multiple references to the same
8223 symbol are allowed, however.
8225 The rtx pointed to by REF must be initialized to NULL_RTX
8226 by the caller before calling this routine. */
8229 find_constant_pool_ref (rtx x, rtx *ref)
8234 /* Ignore LTREL_BASE references. */
8235 if (GET_CODE (x) == UNSPEC
8236 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8238 /* Likewise POOL_ENTRY insns. */
8239 if (GET_CODE (x) == UNSPEC_VOLATILE
8240 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8243 gcc_assert (GET_CODE (x) != SYMBOL_REF
8244 || !CONSTANT_POOL_ADDRESS_P (x));
8246 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8248 rtx sym = XVECEXP (x, 0, 0);
8249 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8250 && CONSTANT_POOL_ADDRESS_P (sym));
8252 if (*ref == NULL_RTX)
8255 gcc_assert (*ref == sym);
8260 fmt = GET_RTX_FORMAT (GET_CODE (x));
8261 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8265 find_constant_pool_ref (XEXP (x, i), ref);
8267 else if (fmt[i] == 'E')
8269 for (j = 0; j < XVECLEN (x, i); j++)
8270 find_constant_pool_ref (XVECEXP (x, i, j), ref);
8275 /* Replace every reference to the annotated literal pool
8276 symbol REF in X by its base plus OFFSET. */
8279 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
8284 gcc_assert (*x != ref);
8286 if (GET_CODE (*x) == UNSPEC
8287 && XINT (*x, 1) == UNSPEC_LTREF
8288 && XVECEXP (*x, 0, 0) == ref)
8290 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8294 if (GET_CODE (*x) == PLUS
8295 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8296 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8297 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8298 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8300 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8301 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8305 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8306 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8310 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
8312 else if (fmt[i] == 'E')
8314 for (j = 0; j < XVECLEN (*x, i); j++)
8315 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
8320 /* Check whether X contains an UNSPEC_LTREL_BASE.
8321 Return its constant pool symbol if found, NULL_RTX otherwise. */
8324 find_ltrel_base (rtx x)
8329 if (GET_CODE (x) == UNSPEC
8330 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8331 return XVECEXP (x, 0, 0);
8333 fmt = GET_RTX_FORMAT (GET_CODE (x));
8334 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8338 rtx fnd = find_ltrel_base (XEXP (x, i));
8342 else if (fmt[i] == 'E')
8344 for (j = 0; j < XVECLEN (x, i); j++)
8346 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
8356 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
8359 replace_ltrel_base (rtx *x)
8364 if (GET_CODE (*x) == UNSPEC
8365 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8367 *x = XVECEXP (*x, 0, 1);
8371 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8372 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8376 replace_ltrel_base (&XEXP (*x, i));
8378 else if (fmt[i] == 'E')
8380 for (j = 0; j < XVECLEN (*x, i); j++)
8381 replace_ltrel_base (&XVECEXP (*x, i, j));
8387 /* We keep a list of constants which we have to add to internal
8388 constant tables in the middle of large functions. */
8390 #define NR_C_MODES 32
8391 machine_mode constant_modes[NR_C_MODES] =
8393 TFmode, TImode, TDmode,
8394 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8395 V4SFmode, V2DFmode, V1TFmode,
8396 DFmode, DImode, DDmode,
8397 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8398 SFmode, SImode, SDmode,
8399 V4QImode, V2HImode, V1SImode, V1SFmode,
8408 struct constant *next;
8410 rtx_code_label *label;
8413 struct constant_pool
8415 struct constant_pool *next;
8416 rtx_insn *first_insn;
8417 rtx_insn *pool_insn;
8419 rtx_insn *emit_pool_after;
8421 struct constant *constants[NR_C_MODES];
8422 struct constant *execute;
8423 rtx_code_label *label;
8427 /* Allocate new constant_pool structure. */
8429 static struct constant_pool *
8430 s390_alloc_pool (void)
8432 struct constant_pool *pool;
8435 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8437 for (i = 0; i < NR_C_MODES; i++)
8438 pool->constants[i] = NULL;
8440 pool->execute = NULL;
8441 pool->label = gen_label_rtx ();
8442 pool->first_insn = NULL;
8443 pool->pool_insn = NULL;
8444 pool->insns = BITMAP_ALLOC (NULL);
8446 pool->emit_pool_after = NULL;
8451 /* Create new constant pool covering instructions starting at INSN
8452 and chain it to the end of POOL_LIST. */
8454 static struct constant_pool *
8455 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8457 struct constant_pool *pool, **prev;
8459 pool = s390_alloc_pool ();
8460 pool->first_insn = insn;
8462 for (prev = pool_list; *prev; prev = &(*prev)->next)
8469 /* End range of instructions covered by POOL at INSN and emit
8470 placeholder insn representing the pool. */
8473 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8475 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8478 insn = get_last_insn ();
8480 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8481 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8484 /* Add INSN to the list of insns covered by POOL. */
8487 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8489 bitmap_set_bit (pool->insns, INSN_UID (insn));
8492 /* Return pool out of POOL_LIST that covers INSN. */
8494 static struct constant_pool *
8495 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8497 struct constant_pool *pool;
8499 for (pool = pool_list; pool; pool = pool->next)
8500 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8506 /* Add constant VAL of mode MODE to the constant pool POOL. */
8509 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8514 for (i = 0; i < NR_C_MODES; i++)
8515 if (constant_modes[i] == mode)
8517 gcc_assert (i != NR_C_MODES);
8519 for (c = pool->constants[i]; c != NULL; c = c->next)
8520 if (rtx_equal_p (val, c->value))
8525 c = (struct constant *) xmalloc (sizeof *c);
8527 c->label = gen_label_rtx ();
8528 c->next = pool->constants[i];
8529 pool->constants[i] = c;
8530 pool->size += GET_MODE_SIZE (mode);
8534 /* Return an rtx that represents the offset of X from the start of
8538 s390_pool_offset (struct constant_pool *pool, rtx x)
8542 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8543 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8544 UNSPEC_POOL_OFFSET);
8545 return gen_rtx_CONST (GET_MODE (x), x);
8548 /* Find constant VAL of mode MODE in the constant pool POOL.
8549 Return an RTX describing the distance from the start of
8550 the pool to the location of the new constant. */
8553 s390_find_constant (struct constant_pool *pool, rtx val,
8559 for (i = 0; i < NR_C_MODES; i++)
8560 if (constant_modes[i] == mode)
8562 gcc_assert (i != NR_C_MODES);
8564 for (c = pool->constants[i]; c != NULL; c = c->next)
8565 if (rtx_equal_p (val, c->value))
8570 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8573 /* Check whether INSN is an execute. Return the label_ref to its
8574 execute target template if so, NULL_RTX otherwise. */
8577 s390_execute_label (rtx insn)
8579 if (NONJUMP_INSN_P (insn)
8580 && GET_CODE (PATTERN (insn)) == PARALLEL
8581 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8582 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8583 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8588 /* Add execute target for INSN to the constant pool POOL. */
8591 s390_add_execute (struct constant_pool *pool, rtx insn)
8595 for (c = pool->execute; c != NULL; c = c->next)
8596 if (INSN_UID (insn) == INSN_UID (c->value))
8601 c = (struct constant *) xmalloc (sizeof *c);
8603 c->label = gen_label_rtx ();
8604 c->next = pool->execute;
8610 /* Find execute target for INSN in the constant pool POOL.
8611 Return an RTX describing the distance from the start of
8612 the pool to the location of the execute target. */
8615 s390_find_execute (struct constant_pool *pool, rtx insn)
8619 for (c = pool->execute; c != NULL; c = c->next)
8620 if (INSN_UID (insn) == INSN_UID (c->value))
8625 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8628 /* For an execute INSN, extract the execute target template. */
8631 s390_execute_target (rtx insn)
8633 rtx pattern = PATTERN (insn);
8634 gcc_assert (s390_execute_label (insn));
8636 if (XVECLEN (pattern, 0) == 2)
8638 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8642 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8645 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8646 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8648 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8654 /* Indicate that INSN cannot be duplicated. This is the case for
8655 execute insns that carry a unique label. */
8658 s390_cannot_copy_insn_p (rtx_insn *insn)
8660 rtx label = s390_execute_label (insn);
8661 return label && label != const0_rtx;
8664 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8665 do not emit the pool base label. */
8668 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8671 rtx_insn *insn = pool->pool_insn;
8674 /* Switch to rodata section. */
8675 if (TARGET_CPU_ZARCH)
8677 insn = emit_insn_after (gen_pool_section_start (), insn);
8678 INSN_ADDRESSES_NEW (insn, -1);
8681 /* Ensure minimum pool alignment. */
8682 if (TARGET_CPU_ZARCH)
8683 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8685 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8686 INSN_ADDRESSES_NEW (insn, -1);
8688 /* Emit pool base label. */
8691 insn = emit_label_after (pool->label, insn);
8692 INSN_ADDRESSES_NEW (insn, -1);
8695 /* Dump constants in descending alignment requirement order,
8696 ensuring proper alignment for every constant. */
8697 for (i = 0; i < NR_C_MODES; i++)
8698 for (c = pool->constants[i]; c; c = c->next)
8700 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8701 rtx value = copy_rtx (c->value);
8702 if (GET_CODE (value) == CONST
8703 && GET_CODE (XEXP (value, 0)) == UNSPEC
8704 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8705 && XVECLEN (XEXP (value, 0), 0) == 1)
8706 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8708 insn = emit_label_after (c->label, insn);
8709 INSN_ADDRESSES_NEW (insn, -1);
8711 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8712 gen_rtvec (1, value),
8713 UNSPECV_POOL_ENTRY);
8714 insn = emit_insn_after (value, insn);
8715 INSN_ADDRESSES_NEW (insn, -1);
8718 /* Ensure minimum alignment for instructions. */
8719 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8720 INSN_ADDRESSES_NEW (insn, -1);
8722 /* Output in-pool execute template insns. */
8723 for (c = pool->execute; c; c = c->next)
8725 insn = emit_label_after (c->label, insn);
8726 INSN_ADDRESSES_NEW (insn, -1);
8728 insn = emit_insn_after (s390_execute_target (c->value), insn);
8729 INSN_ADDRESSES_NEW (insn, -1);
8732 /* Switch back to previous section. */
8733 if (TARGET_CPU_ZARCH)
8735 insn = emit_insn_after (gen_pool_section_end (), insn);
8736 INSN_ADDRESSES_NEW (insn, -1);
8739 insn = emit_barrier_after (insn);
8740 INSN_ADDRESSES_NEW (insn, -1);
8742 /* Remove placeholder insn. */
8743 remove_insn (pool->pool_insn);
8746 /* Free all memory used by POOL. */
8749 s390_free_pool (struct constant_pool *pool)
8751 struct constant *c, *next;
8754 for (i = 0; i < NR_C_MODES; i++)
8755 for (c = pool->constants[i]; c; c = next)
8761 for (c = pool->execute; c; c = next)
8767 BITMAP_FREE (pool->insns);
8772 /* Collect main literal pool. Return NULL on overflow. */
8774 static struct constant_pool *
8775 s390_mainpool_start (void)
8777 struct constant_pool *pool;
8780 pool = s390_alloc_pool ();
8782 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8784 if (NONJUMP_INSN_P (insn)
8785 && GET_CODE (PATTERN (insn)) == SET
8786 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8787 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8789 /* There might be two main_pool instructions if base_reg
8790 is call-clobbered; one for shrink-wrapped code and one
8791 for the rest. We want to keep the first. */
8792 if (pool->pool_insn)
8794 insn = PREV_INSN (insn);
8795 delete_insn (NEXT_INSN (insn));
8798 pool->pool_insn = insn;
8801 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8803 s390_add_execute (pool, insn);
8805 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8807 rtx pool_ref = NULL_RTX;
8808 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8811 rtx constant = get_pool_constant (pool_ref);
8812 machine_mode mode = get_pool_mode (pool_ref);
8813 s390_add_constant (pool, constant, mode);
8817 /* If hot/cold partitioning is enabled we have to make sure that
8818 the literal pool is emitted in the same section where the
8819 initialization of the literal pool base pointer takes place.
8820 emit_pool_after is only used in the non-overflow case on non
8821 Z cpus where we can emit the literal pool at the end of the
8822 function body within the text section. */
8824 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8825 && !pool->emit_pool_after)
8826 pool->emit_pool_after = PREV_INSN (insn);
8829 gcc_assert (pool->pool_insn || pool->size == 0);
8831 if (pool->size >= 4096)
8833 /* We're going to chunkify the pool, so remove the main
8834 pool placeholder insn. */
8835 remove_insn (pool->pool_insn);
8837 s390_free_pool (pool);
8841 /* If the functions ends with the section where the literal pool
8842 should be emitted set the marker to its end. */
8843 if (pool && !pool->emit_pool_after)
8844 pool->emit_pool_after = get_last_insn ();
8849 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8850 Modify the current function to output the pool constants as well as
8851 the pool register setup instruction. */
8854 s390_mainpool_finish (struct constant_pool *pool)
8856 rtx base_reg = cfun->machine->base_reg;
8858 /* If the pool is empty, we're done. */
8859 if (pool->size == 0)
8861 /* We don't actually need a base register after all. */
8862 cfun->machine->base_reg = NULL_RTX;
8864 if (pool->pool_insn)
8865 remove_insn (pool->pool_insn);
8866 s390_free_pool (pool);
8870 /* We need correct insn addresses. */
8871 shorten_branches (get_insns ());
8873 /* On zSeries, we use a LARL to load the pool register. The pool is
8874 located in the .rodata section, so we emit it after the function. */
8875 if (TARGET_CPU_ZARCH)
8877 rtx set = gen_main_base_64 (base_reg, pool->label);
8878 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8879 INSN_ADDRESSES_NEW (insn, -1);
8880 remove_insn (pool->pool_insn);
8882 insn = get_last_insn ();
8883 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8884 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8886 s390_dump_pool (pool, 0);
8889 /* On S/390, if the total size of the function's code plus literal pool
8890 does not exceed 4096 bytes, we use BASR to set up a function base
8891 pointer, and emit the literal pool at the end of the function. */
8892 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
8893 + pool->size + 8 /* alignment slop */ < 4096)
8895 rtx set = gen_main_base_31_small (base_reg, pool->label);
8896 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8897 INSN_ADDRESSES_NEW (insn, -1);
8898 remove_insn (pool->pool_insn);
8900 insn = emit_label_after (pool->label, insn);
8901 INSN_ADDRESSES_NEW (insn, -1);
8903 /* emit_pool_after will be set by s390_mainpool_start to the
8904 last insn of the section where the literal pool should be
8906 insn = pool->emit_pool_after;
8908 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8909 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8911 s390_dump_pool (pool, 1);
8914 /* Otherwise, we emit an inline literal pool and use BASR to branch
8915 over it, setting up the pool register at the same time. */
8918 rtx_code_label *pool_end = gen_label_rtx ();
8920 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
8921 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
8922 JUMP_LABEL (insn) = pool_end;
8923 INSN_ADDRESSES_NEW (insn, -1);
8924 remove_insn (pool->pool_insn);
8926 insn = emit_label_after (pool->label, insn);
8927 INSN_ADDRESSES_NEW (insn, -1);
8929 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8930 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8932 insn = emit_label_after (pool_end, pool->pool_insn);
8933 INSN_ADDRESSES_NEW (insn, -1);
8935 s390_dump_pool (pool, 1);
8939 /* Replace all literal pool references. */
8941 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8944 replace_ltrel_base (&PATTERN (insn));
8946 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8948 rtx addr, pool_ref = NULL_RTX;
8949 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8952 if (s390_execute_label (insn))
8953 addr = s390_find_execute (pool, insn);
8955 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8956 get_pool_mode (pool_ref));
8958 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8959 INSN_CODE (insn) = -1;
8965 /* Free the pool. */
8966 s390_free_pool (pool);
8969 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8970 We have decided we cannot use this pool, so revert all changes
8971 to the current function that were done by s390_mainpool_start. */
8973 s390_mainpool_cancel (struct constant_pool *pool)
8975 /* We didn't actually change the instruction stream, so simply
8976 free the pool memory. */
8977 s390_free_pool (pool);
8981 /* Chunkify the literal pool. */
8983 #define S390_POOL_CHUNK_MIN 0xc00
8984 #define S390_POOL_CHUNK_MAX 0xe00
8986 static struct constant_pool *
8987 s390_chunkify_start (void)
8989 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8992 rtx pending_ltrel = NULL_RTX;
8995 rtx (*gen_reload_base) (rtx, rtx) =
8996 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
8999 /* We need correct insn addresses. */
9001 shorten_branches (get_insns ());
9003 /* Scan all insns and move literals to pool chunks. */
9005 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9007 bool section_switch_p = false;
9009 /* Check for pending LTREL_BASE. */
9012 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
9015 gcc_assert (ltrel_base == pending_ltrel);
9016 pending_ltrel = NULL_RTX;
9020 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
9023 curr_pool = s390_start_pool (&pool_list, insn);
9025 s390_add_execute (curr_pool, insn);
9026 s390_add_pool_insn (curr_pool, insn);
9028 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9030 rtx pool_ref = NULL_RTX;
9031 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9034 rtx constant = get_pool_constant (pool_ref);
9035 machine_mode mode = get_pool_mode (pool_ref);
9038 curr_pool = s390_start_pool (&pool_list, insn);
9040 s390_add_constant (curr_pool, constant, mode);
9041 s390_add_pool_insn (curr_pool, insn);
9043 /* Don't split the pool chunk between a LTREL_OFFSET load
9044 and the corresponding LTREL_BASE. */
9045 if (GET_CODE (constant) == CONST
9046 && GET_CODE (XEXP (constant, 0)) == UNSPEC
9047 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
9049 gcc_assert (!pending_ltrel);
9050 pending_ltrel = pool_ref;
9055 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9058 s390_add_pool_insn (curr_pool, insn);
9059 /* An LTREL_BASE must follow within the same basic block. */
9060 gcc_assert (!pending_ltrel);
9064 switch (NOTE_KIND (insn))
9066 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
9067 section_switch_p = true;
9069 case NOTE_INSN_VAR_LOCATION:
9070 case NOTE_INSN_CALL_ARG_LOCATION:
9077 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9078 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9081 if (TARGET_CPU_ZARCH)
9083 if (curr_pool->size < S390_POOL_CHUNK_MAX)
9086 s390_end_pool (curr_pool, NULL);
9091 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
9092 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
9095 /* We will later have to insert base register reload insns.
9096 Those will have an effect on code size, which we need to
9097 consider here. This calculation makes rather pessimistic
9098 worst-case assumptions. */
9102 if (chunk_size < S390_POOL_CHUNK_MIN
9103 && curr_pool->size < S390_POOL_CHUNK_MIN
9104 && !section_switch_p)
9107 /* Pool chunks can only be inserted after BARRIERs ... */
9108 if (BARRIER_P (insn))
9110 s390_end_pool (curr_pool, insn);
9115 /* ... so if we don't find one in time, create one. */
9116 else if (chunk_size > S390_POOL_CHUNK_MAX
9117 || curr_pool->size > S390_POOL_CHUNK_MAX
9118 || section_switch_p)
9120 rtx_insn *label, *jump, *barrier, *next, *prev;
9122 if (!section_switch_p)
9124 /* We can insert the barrier only after a 'real' insn. */
9125 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
9127 if (get_attr_length (insn) == 0)
9129 /* Don't separate LTREL_BASE from the corresponding
9130 LTREL_OFFSET load. */
9137 next = NEXT_INSN (insn);
9141 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
9142 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
9146 gcc_assert (!pending_ltrel);
9148 /* The old pool has to end before the section switch
9149 note in order to make it part of the current
9151 insn = PREV_INSN (insn);
9154 label = gen_label_rtx ();
9156 if (prev && NOTE_P (prev))
9157 prev = prev_nonnote_insn (prev);
9159 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
9160 INSN_LOCATION (prev));
9162 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
9163 barrier = emit_barrier_after (jump);
9164 insn = emit_label_after (label, barrier);
9165 JUMP_LABEL (jump) = label;
9166 LABEL_NUSES (label) = 1;
9168 INSN_ADDRESSES_NEW (jump, -1);
9169 INSN_ADDRESSES_NEW (barrier, -1);
9170 INSN_ADDRESSES_NEW (insn, -1);
9172 s390_end_pool (curr_pool, barrier);
9180 s390_end_pool (curr_pool, NULL);
9181 gcc_assert (!pending_ltrel);
9183 /* Find all labels that are branched into
9184 from an insn belonging to a different chunk. */
9186 far_labels = BITMAP_ALLOC (NULL);
9188 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9190 rtx_jump_table_data *table;
9192 /* Labels marked with LABEL_PRESERVE_P can be target
9193 of non-local jumps, so we have to mark them.
9194 The same holds for named labels.
9196 Don't do that, however, if it is the label before
9200 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9202 rtx_insn *vec_insn = NEXT_INSN (insn);
9203 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9204 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9206 /* Check potential targets in a table jump (casesi_jump). */
9207 else if (tablejump_p (insn, NULL, &table))
9209 rtx vec_pat = PATTERN (table);
9210 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9212 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9214 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9216 if (s390_find_pool (pool_list, label)
9217 != s390_find_pool (pool_list, insn))
9218 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9221 /* If we have a direct jump (conditional or unconditional),
9222 check all potential targets. */
9223 else if (JUMP_P (insn))
9225 rtx pat = PATTERN (insn);
9227 if (GET_CODE (pat) == PARALLEL)
9228 pat = XVECEXP (pat, 0, 0);
9230 if (GET_CODE (pat) == SET)
9232 rtx label = JUMP_LABEL (insn);
9233 if (label && !ANY_RETURN_P (label))
9235 if (s390_find_pool (pool_list, label)
9236 != s390_find_pool (pool_list, insn))
9237 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9243 /* Insert base register reload insns before every pool. */
9245 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9247 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9249 rtx_insn *insn = curr_pool->first_insn;
9250 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9253 /* Insert base register reload insns at every far label. */
9255 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9257 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9259 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9262 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9264 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9269 BITMAP_FREE (far_labels);
9272 /* Recompute insn addresses. */
9274 init_insn_lengths ();
9275 shorten_branches (get_insns ());
9280 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9281 After we have decided to use this list, finish implementing
9282 all changes to the current function as required. */
9285 s390_chunkify_finish (struct constant_pool *pool_list)
9287 struct constant_pool *curr_pool = NULL;
9291 /* Replace all literal pool references. */
9293 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9296 replace_ltrel_base (&PATTERN (insn));
9298 curr_pool = s390_find_pool (pool_list, insn);
9302 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9304 rtx addr, pool_ref = NULL_RTX;
9305 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9308 if (s390_execute_label (insn))
9309 addr = s390_find_execute (curr_pool, insn);
9311 addr = s390_find_constant (curr_pool,
9312 get_pool_constant (pool_ref),
9313 get_pool_mode (pool_ref));
9315 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9316 INSN_CODE (insn) = -1;
9321 /* Dump out all literal pools. */
9323 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9324 s390_dump_pool (curr_pool, 0);
9326 /* Free pool list. */
9330 struct constant_pool *next = pool_list->next;
9331 s390_free_pool (pool_list);
9336 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9337 We have decided we cannot use this list, so revert all changes
9338 to the current function that were done by s390_chunkify_start. */
9341 s390_chunkify_cancel (struct constant_pool *pool_list)
9343 struct constant_pool *curr_pool = NULL;
9346 /* Remove all pool placeholder insns. */
9348 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9350 /* Did we insert an extra barrier? Remove it. */
9351 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
9352 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
9353 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
9355 if (jump && JUMP_P (jump)
9356 && barrier && BARRIER_P (barrier)
9357 && label && LABEL_P (label)
9358 && GET_CODE (PATTERN (jump)) == SET
9359 && SET_DEST (PATTERN (jump)) == pc_rtx
9360 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
9361 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
9364 remove_insn (barrier);
9365 remove_insn (label);
9368 remove_insn (curr_pool->pool_insn);
9371 /* Remove all base register reload insns. */
9373 for (insn = get_insns (); insn; )
9375 rtx_insn *next_insn = NEXT_INSN (insn);
9377 if (NONJUMP_INSN_P (insn)
9378 && GET_CODE (PATTERN (insn)) == SET
9379 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
9380 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
9386 /* Free pool list. */
9390 struct constant_pool *next = pool_list->next;
9391 s390_free_pool (pool_list);
9396 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9399 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9401 switch (GET_MODE_CLASS (mode))
9404 case MODE_DECIMAL_FLOAT:
9405 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9407 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp), mode, align);
9411 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9412 mark_symbol_refs_as_used (exp);
9415 case MODE_VECTOR_INT:
9416 case MODE_VECTOR_FLOAT:
9419 machine_mode inner_mode;
9420 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9422 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9423 for (i = 0; i < XVECLEN (exp, 0); i++)
9424 s390_output_pool_entry (XVECEXP (exp, 0, i),
9428 : GET_MODE_BITSIZE (inner_mode));
9438 /* Return an RTL expression representing the value of the return address
9439 for the frame COUNT steps up from the current frame. FRAME is the
9440 frame pointer of that frame. */
9443 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9448 /* Without backchain, we fail for all but the current frame. */
9450 if (!TARGET_BACKCHAIN && count > 0)
9453 /* For the current frame, we need to make sure the initial
9454 value of RETURN_REGNUM is actually saved. */
9458 /* On non-z architectures branch splitting could overwrite r14. */
9459 if (TARGET_CPU_ZARCH)
9460 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9463 cfun_frame_layout.save_return_addr_p = true;
9464 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
9468 if (TARGET_PACKED_STACK)
9469 offset = -2 * UNITS_PER_LONG;
9471 offset = RETURN_REGNUM * UNITS_PER_LONG;
9473 addr = plus_constant (Pmode, frame, offset);
9474 addr = memory_address (Pmode, addr);
9475 return gen_rtx_MEM (Pmode, addr);
9478 /* Return an RTL expression representing the back chain stored in
9479 the current stack frame. */
9482 s390_back_chain_rtx (void)
9486 gcc_assert (TARGET_BACKCHAIN);
9488 if (TARGET_PACKED_STACK)
9489 chain = plus_constant (Pmode, stack_pointer_rtx,
9490 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9492 chain = stack_pointer_rtx;
9494 chain = gen_rtx_MEM (Pmode, chain);
9498 /* Find first call clobbered register unused in a function.
9499 This could be used as base register in a leaf function
9500 or for holding the return address before epilogue. */
9503 find_unused_clobbered_reg (void)
9506 for (i = 0; i < 6; i++)
9507 if (!df_regs_ever_live_p (i))
9513 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9514 clobbered hard regs in SETREG. */
9517 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9519 char *regs_ever_clobbered = (char *)data;
9520 unsigned int i, regno;
9521 machine_mode mode = GET_MODE (setreg);
9523 if (GET_CODE (setreg) == SUBREG)
9525 rtx inner = SUBREG_REG (setreg);
9526 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9528 regno = subreg_regno (setreg);
9530 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9531 regno = REGNO (setreg);
9536 i < regno + HARD_REGNO_NREGS (regno, mode);
9538 regs_ever_clobbered[i] = 1;
9541 /* Walks through all basic blocks of the current function looking
9542 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9543 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9544 each of those regs. */
9547 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9553 memset (regs_ever_clobbered, 0, 32);
9555 /* For non-leaf functions we have to consider all call clobbered regs to be
9559 for (i = 0; i < 32; i++)
9560 regs_ever_clobbered[i] = call_really_used_regs[i];
9563 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9564 this work is done by liveness analysis (mark_regs_live_at_end).
9565 Special care is needed for functions containing landing pads. Landing pads
9566 may use the eh registers, but the code which sets these registers is not
9567 contained in that function. Hence s390_regs_ever_clobbered is not able to
9568 deal with this automatically. */
9569 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9570 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9571 if (crtl->calls_eh_return
9572 || (cfun->machine->has_landing_pad_p
9573 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9574 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9576 /* For nonlocal gotos all call-saved registers have to be saved.
9577 This flag is also set for the unwinding code in libgcc.
9578 See expand_builtin_unwind_init. For regs_ever_live this is done by
9580 if (crtl->saves_all_registers)
9581 for (i = 0; i < 32; i++)
9582 if (!call_really_used_regs[i])
9583 regs_ever_clobbered[i] = 1;
9585 FOR_EACH_BB_FN (cur_bb, cfun)
9587 FOR_BB_INSNS (cur_bb, cur_insn)
9591 if (!INSN_P (cur_insn))
9594 pat = PATTERN (cur_insn);
9596 /* Ignore GPR restore insns. */
9597 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9599 if (GET_CODE (pat) == SET
9600 && GENERAL_REG_P (SET_DEST (pat)))
9603 if (GET_MODE (SET_SRC (pat)) == DImode
9604 && FP_REG_P (SET_SRC (pat)))
9608 if (GET_CODE (SET_SRC (pat)) == MEM)
9613 if (GET_CODE (pat) == PARALLEL
9614 && load_multiple_operation (pat, VOIDmode))
9619 s390_reg_clobbered_rtx,
9620 regs_ever_clobbered);
9625 /* Determine the frame area which actually has to be accessed
9626 in the function epilogue. The values are stored at the
9627 given pointers AREA_BOTTOM (address of the lowest used stack
9628 address) and AREA_TOP (address of the first item which does
9629 not belong to the stack frame). */
9632 s390_frame_area (int *area_bottom, int *area_top)
9639 if (cfun_frame_layout.first_restore_gpr != -1)
9641 b = (cfun_frame_layout.gprs_offset
9642 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9643 t = b + (cfun_frame_layout.last_restore_gpr
9644 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9647 if (TARGET_64BIT && cfun_save_high_fprs_p)
9649 b = MIN (b, cfun_frame_layout.f8_offset);
9650 t = MAX (t, (cfun_frame_layout.f8_offset
9651 + cfun_frame_layout.high_fprs * 8));
9656 if (cfun_fpr_save_p (FPR4_REGNUM))
9658 b = MIN (b, cfun_frame_layout.f4_offset);
9659 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9661 if (cfun_fpr_save_p (FPR6_REGNUM))
9663 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9664 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9670 /* Update gpr_save_slots in the frame layout trying to make use of
9671 FPRs as GPR save slots.
9672 This is a helper routine of s390_register_info. */
9675 s390_register_info_gprtofpr ()
9677 int save_reg_slot = FPR0_REGNUM;
9680 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9683 /* builtin_eh_return needs to be able to modify the return address
9684 on the stack. It could also adjust the FPR save slot instead but
9685 is it worth the trouble?! */
9686 if (crtl->calls_eh_return)
9689 for (i = 15; i >= 6; i--)
9691 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9694 /* Advance to the next FP register which can be used as a
9696 while ((!call_really_used_regs[save_reg_slot]
9697 || df_regs_ever_live_p (save_reg_slot)
9698 || cfun_fpr_save_p (save_reg_slot))
9699 && FP_REGNO_P (save_reg_slot))
9701 if (!FP_REGNO_P (save_reg_slot))
9703 /* We only want to use ldgr/lgdr if we can get rid of
9704 stm/lm entirely. So undo the gpr slot allocation in
9705 case we ran out of FPR save slots. */
9706 for (j = 6; j <= 15; j++)
9707 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9708 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9711 cfun_gpr_save_slot (i) = save_reg_slot++;
9715 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9717 This is a helper routine for s390_register_info. */
9720 s390_register_info_stdarg_fpr ()
9726 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9727 f0-f4 for 64 bit. */
9729 || !TARGET_HARD_FLOAT
9730 || !cfun->va_list_fpr_size
9731 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9734 min_fpr = crtl->args.info.fprs;
9735 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9736 if (max_fpr >= FP_ARG_NUM_REG)
9737 max_fpr = FP_ARG_NUM_REG - 1;
9739 /* FPR argument regs start at f0. */
9740 min_fpr += FPR0_REGNUM;
9741 max_fpr += FPR0_REGNUM;
9743 for (i = min_fpr; i <= max_fpr; i++)
9744 cfun_set_fpr_save (i);
9747 /* Reserve the GPR save slots for GPRs which need to be saved due to
9749 This is a helper routine for s390_register_info. */
9752 s390_register_info_stdarg_gpr ()
9759 || !cfun->va_list_gpr_size
9760 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9763 min_gpr = crtl->args.info.gprs;
9764 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9765 if (max_gpr >= GP_ARG_NUM_REG)
9766 max_gpr = GP_ARG_NUM_REG - 1;
9768 /* GPR argument regs start at r2. */
9769 min_gpr += GPR2_REGNUM;
9770 max_gpr += GPR2_REGNUM;
9772 /* If r6 was supposed to be saved into an FPR and now needs to go to
9773 the stack for vararg we have to adjust the restore range to make
9774 sure that the restore is done from stack as well. */
9775 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9776 && min_gpr <= GPR6_REGNUM
9777 && max_gpr >= GPR6_REGNUM)
9779 if (cfun_frame_layout.first_restore_gpr == -1
9780 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9781 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9782 if (cfun_frame_layout.last_restore_gpr == -1
9783 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9784 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9787 if (cfun_frame_layout.first_save_gpr == -1
9788 || cfun_frame_layout.first_save_gpr > min_gpr)
9789 cfun_frame_layout.first_save_gpr = min_gpr;
9791 if (cfun_frame_layout.last_save_gpr == -1
9792 || cfun_frame_layout.last_save_gpr < max_gpr)
9793 cfun_frame_layout.last_save_gpr = max_gpr;
9795 for (i = min_gpr; i <= max_gpr; i++)
9796 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9799 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9800 prologue and epilogue. */
9803 s390_register_info_set_ranges ()
9807 /* Find the first and the last save slot supposed to use the stack
9808 to set the restore range.
9809 Vararg regs might be marked as save to stack but only the
9810 call-saved regs really need restoring (i.e. r6). This code
9811 assumes that the vararg regs have not yet been recorded in
9812 cfun_gpr_save_slot. */
9813 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9814 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9815 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9816 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9817 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9818 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9821 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9822 for registers which need to be saved in function prologue.
9823 This function can be used until the insns emitted for save/restore
9824 of the regs are visible in the RTL stream. */
9827 s390_register_info ()
9830 char clobbered_regs[32];
9832 gcc_assert (!epilogue_completed);
9834 if (reload_completed)
9835 /* After reload we rely on our own routine to determine which
9836 registers need saving. */
9837 s390_regs_ever_clobbered (clobbered_regs);
9839 /* During reload we use regs_ever_live as a base since reload
9840 does changes in there which we otherwise would not be aware
9842 for (i = 0; i < 32; i++)
9843 clobbered_regs[i] = df_regs_ever_live_p (i);
9845 for (i = 0; i < 32; i++)
9846 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9848 /* Mark the call-saved FPRs which need to be saved.
9849 This needs to be done before checking the special GPRs since the
9850 stack pointer usage depends on whether high FPRs have to be saved
9852 cfun_frame_layout.fpr_bitmap = 0;
9853 cfun_frame_layout.high_fprs = 0;
9854 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9855 if (clobbered_regs[i] && !call_really_used_regs[i])
9857 cfun_set_fpr_save (i);
9858 if (i >= FPR8_REGNUM)
9859 cfun_frame_layout.high_fprs++;
9862 /* Register 12 is used for GOT address, but also as temp in prologue
9863 for split-stack stdarg functions (unless r14 is available). */
9865 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9866 || (flag_split_stack && cfun->stdarg
9867 && (crtl->is_leaf || TARGET_TPF_PROFILING
9868 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9870 clobbered_regs[BASE_REGNUM]
9871 |= (cfun->machine->base_reg
9872 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9874 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9875 |= !!frame_pointer_needed;
9877 /* On pre z900 machines this might take until machine dependent
9879 save_return_addr_p will only be set on non-zarch machines so
9880 there is no risk that r14 goes into an FPR instead of a stack
9882 clobbered_regs[RETURN_REGNUM]
9884 || TARGET_TPF_PROFILING
9885 || cfun->machine->split_branches_pending_p
9886 || cfun_frame_layout.save_return_addr_p
9887 || crtl->calls_eh_return);
9889 clobbered_regs[STACK_POINTER_REGNUM]
9891 || TARGET_TPF_PROFILING
9892 || cfun_save_high_fprs_p
9893 || get_frame_size () > 0
9894 || (reload_completed && cfun_frame_layout.frame_size > 0)
9895 || cfun->calls_alloca);
9897 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9899 for (i = 6; i < 16; i++)
9900 if (clobbered_regs[i])
9901 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9903 s390_register_info_stdarg_fpr ();
9904 s390_register_info_gprtofpr ();
9905 s390_register_info_set_ranges ();
9906 /* stdarg functions might need to save GPRs 2 to 6. This might
9907 override the GPR->FPR save decision made by
9908 s390_register_info_gprtofpr for r6 since vararg regs must go to
9910 s390_register_info_stdarg_gpr ();
9913 /* This function is called by s390_optimize_prologue in order to get
9914 rid of unnecessary GPR save/restore instructions. The register info
9915 for the GPRs is re-computed and the ranges are re-calculated. */
9918 s390_optimize_register_info ()
9920 char clobbered_regs[32];
9923 gcc_assert (epilogue_completed);
9924 gcc_assert (!cfun->machine->split_branches_pending_p);
9926 s390_regs_ever_clobbered (clobbered_regs);
9928 for (i = 0; i < 32; i++)
9929 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9931 /* There is still special treatment needed for cases invisible to
9932 s390_regs_ever_clobbered. */
9933 clobbered_regs[RETURN_REGNUM]
9934 |= (TARGET_TPF_PROFILING
9935 /* When expanding builtin_return_addr in ESA mode we do not
9936 know whether r14 will later be needed as scratch reg when
9937 doing branch splitting. So the builtin always accesses the
9938 r14 save slot and we need to stick to the save/restore
9939 decision for r14 even if it turns out that it didn't get
9941 || cfun_frame_layout.save_return_addr_p
9942 || crtl->calls_eh_return);
9944 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9946 for (i = 6; i < 16; i++)
9947 if (!clobbered_regs[i])
9948 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9950 s390_register_info_set_ranges ();
9951 s390_register_info_stdarg_gpr ();
9954 /* Fill cfun->machine with info about frame of current function. */
9957 s390_frame_info (void)
9959 HOST_WIDE_INT lowest_offset;
9961 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9962 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9964 /* The va_arg builtin uses a constant distance of 16 *
9965 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9966 pointer. So even if we are going to save the stack pointer in an
9967 FPR we need the stack space in order to keep the offsets
9969 if (cfun->stdarg && cfun_save_arg_fprs_p)
9971 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9973 if (cfun_frame_layout.first_save_gpr_slot == -1)
9974 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9977 cfun_frame_layout.frame_size = get_frame_size ();
9978 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9979 fatal_error (input_location,
9980 "total size of local variables exceeds architecture limit");
9982 if (!TARGET_PACKED_STACK)
9984 /* Fixed stack layout. */
9985 cfun_frame_layout.backchain_offset = 0;
9986 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9987 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9988 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9989 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9992 else if (TARGET_BACKCHAIN)
9994 /* Kernel stack layout - packed stack, backchain, no float */
9995 gcc_assert (TARGET_SOFT_FLOAT);
9996 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9999 /* The distance between the backchain and the return address
10000 save slot must not change. So we always need a slot for the
10001 stack pointer which resides in between. */
10002 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10004 cfun_frame_layout.gprs_offset
10005 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
10007 /* FPRs will not be saved. Nevertheless pick sane values to
10008 keep area calculations valid. */
10009 cfun_frame_layout.f0_offset =
10010 cfun_frame_layout.f4_offset =
10011 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
10017 /* Packed stack layout without backchain. */
10019 /* With stdarg FPRs need their dedicated slots. */
10020 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
10021 : (cfun_fpr_save_p (FPR4_REGNUM) +
10022 cfun_fpr_save_p (FPR6_REGNUM)));
10023 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
10025 num_fprs = (cfun->stdarg ? 2
10026 : (cfun_fpr_save_p (FPR0_REGNUM)
10027 + cfun_fpr_save_p (FPR2_REGNUM)));
10028 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
10030 cfun_frame_layout.gprs_offset
10031 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
10033 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
10034 - cfun_frame_layout.high_fprs * 8);
10037 if (cfun_save_high_fprs_p)
10038 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
10040 if (!crtl->is_leaf)
10041 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
10043 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
10044 sized area at the bottom of the stack. This is required also for
10045 leaf functions. When GCC generates a local stack reference it
10046 will always add STACK_POINTER_OFFSET to all these references. */
10048 && !TARGET_TPF_PROFILING
10049 && cfun_frame_layout.frame_size == 0
10050 && !cfun->calls_alloca)
10053 /* Calculate the number of bytes we have used in our own register
10054 save area. With the packed stack layout we can re-use the
10055 remaining bytes for normal stack elements. */
10057 if (TARGET_PACKED_STACK)
10058 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
10059 cfun_frame_layout.f4_offset),
10060 cfun_frame_layout.gprs_offset);
10064 if (TARGET_BACKCHAIN)
10065 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
10067 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
10069 /* If under 31 bit an odd number of gprs has to be saved we have to
10070 adjust the frame size to sustain 8 byte alignment of stack
10072 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
10073 STACK_BOUNDARY / BITS_PER_UNIT - 1)
10074 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
10077 /* Generate frame layout. Fills in register and frame data for the current
10078 function in cfun->machine. This routine can be called multiple times;
10079 it will re-do the complete frame layout every time. */
10082 s390_init_frame_layout (void)
10084 HOST_WIDE_INT frame_size;
10087 /* After LRA the frame layout is supposed to be read-only and should
10088 not be re-computed. */
10089 if (reload_completed)
10092 /* On S/390 machines, we may need to perform branch splitting, which
10093 will require both base and return address register. We have no
10094 choice but to assume we're going to need them until right at the
10095 end of the machine dependent reorg phase. */
10096 if (!TARGET_CPU_ZARCH)
10097 cfun->machine->split_branches_pending_p = true;
10101 frame_size = cfun_frame_layout.frame_size;
10103 /* Try to predict whether we'll need the base register. */
10104 base_used = cfun->machine->split_branches_pending_p
10105 || crtl->uses_const_pool
10106 || (!DISP_IN_RANGE (frame_size)
10107 && !CONST_OK_FOR_K (frame_size));
10109 /* Decide which register to use as literal pool base. In small
10110 leaf functions, try to use an unused call-clobbered register
10111 as base register to avoid save/restore overhead. */
10113 cfun->machine->base_reg = NULL_RTX;
10119 /* Prefer r5 (most likely to be free). */
10120 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10122 cfun->machine->base_reg =
10123 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10126 s390_register_info ();
10127 s390_frame_info ();
10129 while (frame_size != cfun_frame_layout.frame_size);
10132 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10133 the TX is nonescaping. A transaction is considered escaping if
10134 there is at least one path from tbegin returning CC0 to the
10135 function exit block without an tend.
10137 The check so far has some limitations:
10138 - only single tbegin/tend BBs are supported
10139 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10140 - when CC is copied to a GPR and the CC0 check is done with the GPR
10141 this is not supported
10145 s390_optimize_nonescaping_tx (void)
10147 const unsigned int CC0 = 1 << 3;
10148 basic_block tbegin_bb = NULL;
10149 basic_block tend_bb = NULL;
10152 bool result = true;
10154 rtx_insn *tbegin_insn = NULL;
10156 if (!cfun->machine->tbegin_p)
10159 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10161 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10166 FOR_BB_INSNS (bb, insn)
10168 rtx ite, cc, pat, target;
10169 unsigned HOST_WIDE_INT mask;
10171 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10174 pat = PATTERN (insn);
10176 if (GET_CODE (pat) == PARALLEL)
10177 pat = XVECEXP (pat, 0, 0);
10179 if (GET_CODE (pat) != SET
10180 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10183 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10187 tbegin_insn = insn;
10189 /* Just return if the tbegin doesn't have clobbers. */
10190 if (GET_CODE (PATTERN (insn)) != PARALLEL)
10193 if (tbegin_bb != NULL)
10196 /* Find the next conditional jump. */
10197 for (tmp = NEXT_INSN (insn);
10199 tmp = NEXT_INSN (tmp))
10201 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10206 ite = SET_SRC (PATTERN (tmp));
10207 if (GET_CODE (ite) != IF_THEN_ELSE)
10210 cc = XEXP (XEXP (ite, 0), 0);
10211 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10212 || GET_MODE (cc) != CCRAWmode
10213 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10216 if (bb->succs->length () != 2)
10219 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10220 if (GET_CODE (XEXP (ite, 0)) == NE)
10224 target = XEXP (ite, 1);
10225 else if (mask == (CC0 ^ 0xf))
10226 target = XEXP (ite, 2);
10234 ei = ei_start (bb->succs);
10235 e1 = ei_safe_edge (ei);
10237 e2 = ei_safe_edge (ei);
10239 if (e2->flags & EDGE_FALLTHRU)
10242 e1 = ei_safe_edge (ei);
10245 if (!(e1->flags & EDGE_FALLTHRU))
10248 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10250 if (tmp == BB_END (bb))
10255 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10257 if (tend_bb != NULL)
10264 /* Either we successfully remove the FPR clobbers here or we are not
10265 able to do anything for this TX. Both cases don't qualify for
10267 cfun->machine->tbegin_p = false;
10269 if (tbegin_bb == NULL || tend_bb == NULL)
10272 calculate_dominance_info (CDI_POST_DOMINATORS);
10273 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10274 free_dominance_info (CDI_POST_DOMINATORS);
10279 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10281 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10282 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10283 INSN_CODE (tbegin_insn) = -1;
10284 df_insn_rescan (tbegin_insn);
10289 /* Return true if it is legal to put a value with MODE into REGNO. */
10292 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10294 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10297 switch (REGNO_REG_CLASS (regno))
10300 return ((GET_MODE_CLASS (mode) == MODE_INT
10301 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10303 || (TARGET_VXE && mode == SFmode)
10304 || s390_vector_mode_supported_p (mode));
10308 && ((GET_MODE_CLASS (mode) == MODE_INT
10309 && s390_class_max_nregs (FP_REGS, mode) == 1)
10311 || s390_vector_mode_supported_p (mode)))
10314 if (REGNO_PAIR_OK (regno, mode))
10316 if (mode == SImode || mode == DImode)
10319 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10324 if (FRAME_REGNO_P (regno) && mode == Pmode)
10329 if (REGNO_PAIR_OK (regno, mode))
10332 || (mode != TFmode && mode != TCmode && mode != TDmode))
10337 if (GET_MODE_CLASS (mode) == MODE_CC)
10341 if (REGNO_PAIR_OK (regno, mode))
10343 if (mode == SImode || mode == Pmode)
10354 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10357 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10359 /* Once we've decided upon a register to use as base register, it must
10360 no longer be used for any other purpose. */
10361 if (cfun->machine->base_reg)
10362 if (REGNO (cfun->machine->base_reg) == old_reg
10363 || REGNO (cfun->machine->base_reg) == new_reg)
10366 /* Prevent regrename from using call-saved regs which haven't
10367 actually been saved. This is necessary since regrename assumes
10368 the backend save/restore decisions are based on
10369 df_regs_ever_live. Since we have our own routine we have to tell
10370 regrename manually about it. */
10371 if (GENERAL_REGNO_P (new_reg)
10372 && !call_really_used_regs[new_reg]
10373 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10379 /* Return nonzero if register REGNO can be used as a scratch register
10383 s390_hard_regno_scratch_ok (unsigned int regno)
10385 /* See s390_hard_regno_rename_ok. */
10386 if (GENERAL_REGNO_P (regno)
10387 && !call_really_used_regs[regno]
10388 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10394 /* Maximum number of registers to represent a value of mode MODE
10395 in a register of class RCLASS. */
10398 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10401 bool reg_pair_required_p = false;
10407 reg_size = TARGET_VX ? 16 : 8;
10409 /* TF and TD modes would fit into a VR but we put them into a
10410 register pair since we do not have 128bit FP instructions on
10413 && SCALAR_FLOAT_MODE_P (mode)
10414 && GET_MODE_SIZE (mode) >= 16)
10415 reg_pair_required_p = true;
10417 /* Even if complex types would fit into a single FPR/VR we force
10418 them into a register pair to deal with the parts more easily.
10419 (FIXME: What about complex ints?) */
10420 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10421 reg_pair_required_p = true;
10427 reg_size = UNITS_PER_WORD;
10431 if (reg_pair_required_p)
10432 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10434 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10437 /* Return TRUE if changing mode from FROM to TO should not be allowed
10438 for register class CLASS. */
10441 s390_cannot_change_mode_class (machine_mode from_mode,
10442 machine_mode to_mode,
10443 enum reg_class rclass)
10445 machine_mode small_mode;
10446 machine_mode big_mode;
10448 /* V1TF and TF have different representations in vector
10450 if (reg_classes_intersect_p (VEC_REGS, rclass)
10451 && ((from_mode == V1TFmode && to_mode == TFmode)
10452 || (from_mode == TFmode && to_mode == V1TFmode)))
10455 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10458 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10460 small_mode = from_mode;
10461 big_mode = to_mode;
10465 small_mode = to_mode;
10466 big_mode = from_mode;
10469 /* Values residing in VRs are little-endian style. All modes are
10470 placed left-aligned in an VR. This means that we cannot allow
10471 switching between modes with differing sizes. Also if the vector
10472 facility is available we still place TFmode values in VR register
10473 pairs, since the only instructions we have operating on TFmodes
10474 only deal with register pairs. Therefore we have to allow DFmode
10475 subregs of TFmodes to enable the TFmode splitters. */
10476 if (reg_classes_intersect_p (VEC_REGS, rclass)
10477 && (GET_MODE_SIZE (small_mode) < 8
10478 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10481 /* Likewise for access registers, since they have only half the
10482 word size on 64-bit. */
10483 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10489 /* Return true if we use LRA instead of reload pass. */
10493 return s390_lra_flag;
10496 /* Return true if register FROM can be eliminated via register TO. */
10499 s390_can_eliminate (const int from, const int to)
10501 /* On zSeries machines, we have not marked the base register as fixed.
10502 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10503 If a function requires the base register, we say here that this
10504 elimination cannot be performed. This will cause reload to free
10505 up the base register (as if it were fixed). On the other hand,
10506 if the current function does *not* require the base register, we
10507 say here the elimination succeeds, which in turn allows reload
10508 to allocate the base register for any other purpose. */
10509 if (from == BASE_REGNUM && to == BASE_REGNUM)
10511 if (TARGET_CPU_ZARCH)
10513 s390_init_frame_layout ();
10514 return cfun->machine->base_reg == NULL_RTX;
10520 /* Everything else must point into the stack frame. */
10521 gcc_assert (to == STACK_POINTER_REGNUM
10522 || to == HARD_FRAME_POINTER_REGNUM);
10524 gcc_assert (from == FRAME_POINTER_REGNUM
10525 || from == ARG_POINTER_REGNUM
10526 || from == RETURN_ADDRESS_POINTER_REGNUM);
10528 /* Make sure we actually saved the return address. */
10529 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10530 if (!crtl->calls_eh_return
10532 && !cfun_frame_layout.save_return_addr_p)
10538 /* Return offset between register FROM and TO initially after prolog. */
10541 s390_initial_elimination_offset (int from, int to)
10543 HOST_WIDE_INT offset;
10545 /* ??? Why are we called for non-eliminable pairs? */
10546 if (!s390_can_eliminate (from, to))
10551 case FRAME_POINTER_REGNUM:
10552 offset = (get_frame_size()
10553 + STACK_POINTER_OFFSET
10554 + crtl->outgoing_args_size);
10557 case ARG_POINTER_REGNUM:
10558 s390_init_frame_layout ();
10559 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10562 case RETURN_ADDRESS_POINTER_REGNUM:
10563 s390_init_frame_layout ();
10565 if (cfun_frame_layout.first_save_gpr_slot == -1)
10567 /* If it turns out that for stdarg nothing went into the reg
10568 save area we also do not need the return address
10570 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10573 gcc_unreachable ();
10576 /* In order to make the following work it is not necessary for
10577 r14 to have a save slot. It is sufficient if one other GPR
10578 got one. Since the GPRs are always stored without gaps we
10579 are able to calculate where the r14 save slot would
10581 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10582 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10591 gcc_unreachable ();
10597 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10598 to register BASE. Return generated insn. */
10601 save_fpr (rtx base, int offset, int regnum)
10604 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10606 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10607 set_mem_alias_set (addr, get_varargs_alias_set ());
10609 set_mem_alias_set (addr, get_frame_alias_set ());
10611 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10614 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10615 to register BASE. Return generated insn. */
10618 restore_fpr (rtx base, int offset, int regnum)
10621 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10622 set_mem_alias_set (addr, get_frame_alias_set ());
10624 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10627 /* Return true if REGNO is a global register, but not one
10628 of the special ones that need to be saved/restored in anyway. */
10631 global_not_special_regno_p (int regno)
10633 return (global_regs[regno]
10634 /* These registers are special and need to be
10635 restored in any case. */
10636 && !(regno == STACK_POINTER_REGNUM
10637 || regno == RETURN_REGNUM
10638 || regno == BASE_REGNUM
10639 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10642 /* Generate insn to save registers FIRST to LAST into
10643 the register save area located at offset OFFSET
10644 relative to register BASE. */
10647 save_gprs (rtx base, int offset, int first, int last)
10649 rtx addr, insn, note;
10652 addr = plus_constant (Pmode, base, offset);
10653 addr = gen_rtx_MEM (Pmode, addr);
10655 set_mem_alias_set (addr, get_frame_alias_set ());
10657 /* Special-case single register. */
10661 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10663 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10665 if (!global_not_special_regno_p (first))
10666 RTX_FRAME_RELATED_P (insn) = 1;
10671 insn = gen_store_multiple (addr,
10672 gen_rtx_REG (Pmode, first),
10673 GEN_INT (last - first + 1));
10675 if (first <= 6 && cfun->stdarg)
10676 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10678 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10680 if (first + i <= 6)
10681 set_mem_alias_set (mem, get_varargs_alias_set ());
10684 /* We need to set the FRAME_RELATED flag on all SETs
10685 inside the store-multiple pattern.
10687 However, we must not emit DWARF records for registers 2..5
10688 if they are stored for use by variable arguments ...
10690 ??? Unfortunately, it is not enough to simply not the
10691 FRAME_RELATED flags for those SETs, because the first SET
10692 of the PARALLEL is always treated as if it had the flag
10693 set, even if it does not. Therefore we emit a new pattern
10694 without those registers as REG_FRAME_RELATED_EXPR note. */
10696 if (first >= 6 && !global_not_special_regno_p (first))
10698 rtx pat = PATTERN (insn);
10700 for (i = 0; i < XVECLEN (pat, 0); i++)
10701 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10702 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10704 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10706 RTX_FRAME_RELATED_P (insn) = 1;
10708 else if (last >= 6)
10712 for (start = first >= 6 ? first : 6; start <= last; start++)
10713 if (!global_not_special_regno_p (start))
10719 addr = plus_constant (Pmode, base,
10720 offset + (start - first) * UNITS_PER_LONG);
10725 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10726 gen_rtx_REG (Pmode, start));
10728 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10729 gen_rtx_REG (Pmode, start));
10730 note = PATTERN (note);
10732 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10733 RTX_FRAME_RELATED_P (insn) = 1;
10738 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10739 gen_rtx_REG (Pmode, start),
10740 GEN_INT (last - start + 1));
10741 note = PATTERN (note);
10743 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10745 for (i = 0; i < XVECLEN (note, 0); i++)
10746 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10747 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10749 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10751 RTX_FRAME_RELATED_P (insn) = 1;
10757 /* Generate insn to restore registers FIRST to LAST from
10758 the register save area located at offset OFFSET
10759 relative to register BASE. */
10762 restore_gprs (rtx base, int offset, int first, int last)
10766 addr = plus_constant (Pmode, base, offset);
10767 addr = gen_rtx_MEM (Pmode, addr);
10768 set_mem_alias_set (addr, get_frame_alias_set ());
10770 /* Special-case single register. */
10774 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10776 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10778 RTX_FRAME_RELATED_P (insn) = 1;
10782 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10784 GEN_INT (last - first + 1));
10785 RTX_FRAME_RELATED_P (insn) = 1;
10789 /* Return insn sequence to load the GOT register. */
10791 static GTY(()) rtx got_symbol;
10793 s390_load_got (void)
10797 /* We cannot use pic_offset_table_rtx here since we use this
10798 function also for non-pic if __tls_get_offset is called and in
10799 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10801 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10805 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10806 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
10811 if (TARGET_CPU_ZARCH)
10813 emit_move_insn (got_rtx, got_symbol);
10819 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
10820 UNSPEC_LTREL_OFFSET);
10821 offset = gen_rtx_CONST (Pmode, offset);
10822 offset = force_const_mem (Pmode, offset);
10824 emit_move_insn (got_rtx, offset);
10826 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
10827 UNSPEC_LTREL_BASE);
10828 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
10830 emit_move_insn (got_rtx, offset);
10833 insns = get_insns ();
10838 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10839 and the change to the stack pointer. */
10842 s390_emit_stack_tie (void)
10844 rtx mem = gen_frame_mem (BLKmode,
10845 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10847 emit_insn (gen_stack_tie (mem));
10850 /* Copy GPRS into FPR save slots. */
10853 s390_save_gprs_to_fprs (void)
10857 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10860 for (i = 6; i < 16; i++)
10862 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10865 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10866 gen_rtx_REG (DImode, i));
10867 RTX_FRAME_RELATED_P (insn) = 1;
10868 /* This prevents dwarf2cfi from interpreting the set. Doing
10869 so it might emit def_cfa_register infos setting an FPR as
10871 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10876 /* Restore GPRs from FPR save slots. */
10879 s390_restore_gprs_from_fprs (void)
10883 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10886 for (i = 6; i < 16; i++)
10890 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10893 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10895 if (i == STACK_POINTER_REGNUM)
10896 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10898 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10900 df_set_regs_ever_live (i, true);
10901 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10902 if (i == STACK_POINTER_REGNUM)
10903 add_reg_note (insn, REG_CFA_DEF_CFA,
10904 plus_constant (Pmode, stack_pointer_rtx,
10905 STACK_POINTER_OFFSET));
10906 RTX_FRAME_RELATED_P (insn) = 1;
10911 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10916 const pass_data pass_data_s390_early_mach =
10918 RTL_PASS, /* type */
10919 "early_mach", /* name */
10920 OPTGROUP_NONE, /* optinfo_flags */
10921 TV_MACH_DEP, /* tv_id */
10922 0, /* properties_required */
10923 0, /* properties_provided */
10924 0, /* properties_destroyed */
10925 0, /* todo_flags_start */
10926 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10929 class pass_s390_early_mach : public rtl_opt_pass
10932 pass_s390_early_mach (gcc::context *ctxt)
10933 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10936 /* opt_pass methods: */
10937 virtual unsigned int execute (function *);
10939 }; // class pass_s390_early_mach
10942 pass_s390_early_mach::execute (function *fun)
10946 /* Try to get rid of the FPR clobbers. */
10947 s390_optimize_nonescaping_tx ();
10949 /* Re-compute register info. */
10950 s390_register_info ();
10952 /* If we're using a base register, ensure that it is always valid for
10953 the first non-prologue instruction. */
10954 if (fun->machine->base_reg)
10955 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10957 /* Annotate all constant pool references to let the scheduler know
10958 they implicitly use the base register. */
10959 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10962 annotate_constant_pool_refs (&PATTERN (insn));
10963 df_insn_rescan (insn);
10968 } // anon namespace
10970 /* Expand the prologue into a bunch of separate insns. */
10973 s390_emit_prologue (void)
10981 /* Choose best register to use for temp use within prologue.
10982 TPF with profiling must avoid the register 14 - the tracing function
10983 needs the original contents of r14 to be preserved. */
10985 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10987 && !TARGET_TPF_PROFILING)
10988 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10989 else if (flag_split_stack && cfun->stdarg)
10990 temp_reg = gen_rtx_REG (Pmode, 12);
10992 temp_reg = gen_rtx_REG (Pmode, 1);
10994 s390_save_gprs_to_fprs ();
10996 /* Save call saved gprs. */
10997 if (cfun_frame_layout.first_save_gpr != -1)
10999 insn = save_gprs (stack_pointer_rtx,
11000 cfun_frame_layout.gprs_offset +
11001 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11002 - cfun_frame_layout.first_save_gpr_slot),
11003 cfun_frame_layout.first_save_gpr,
11004 cfun_frame_layout.last_save_gpr);
11008 /* Dummy insn to mark literal pool slot. */
11010 if (cfun->machine->base_reg)
11011 emit_insn (gen_main_pool (cfun->machine->base_reg));
11013 offset = cfun_frame_layout.f0_offset;
11015 /* Save f0 and f2. */
11016 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11018 if (cfun_fpr_save_p (i))
11020 save_fpr (stack_pointer_rtx, offset, i);
11023 else if (!TARGET_PACKED_STACK || cfun->stdarg)
11027 /* Save f4 and f6. */
11028 offset = cfun_frame_layout.f4_offset;
11029 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11031 if (cfun_fpr_save_p (i))
11033 insn = save_fpr (stack_pointer_rtx, offset, i);
11036 /* If f4 and f6 are call clobbered they are saved due to
11037 stdargs and therefore are not frame related. */
11038 if (!call_really_used_regs[i])
11039 RTX_FRAME_RELATED_P (insn) = 1;
11041 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
11045 if (TARGET_PACKED_STACK
11046 && cfun_save_high_fprs_p
11047 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11049 offset = (cfun_frame_layout.f8_offset
11050 + (cfun_frame_layout.high_fprs - 1) * 8);
11052 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11053 if (cfun_fpr_save_p (i))
11055 insn = save_fpr (stack_pointer_rtx, offset, i);
11057 RTX_FRAME_RELATED_P (insn) = 1;
11060 if (offset >= cfun_frame_layout.f8_offset)
11064 if (!TARGET_PACKED_STACK)
11065 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11067 if (flag_stack_usage_info)
11068 current_function_static_stack_size = cfun_frame_layout.frame_size;
11070 /* Decrement stack pointer. */
11072 if (cfun_frame_layout.frame_size > 0)
11074 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11075 rtx real_frame_off;
11077 if (s390_stack_size)
11079 HOST_WIDE_INT stack_guard;
11081 if (s390_stack_guard)
11082 stack_guard = s390_stack_guard;
11085 /* If no value for stack guard is provided the smallest power of 2
11086 larger than the current frame size is chosen. */
11088 while (stack_guard < cfun_frame_layout.frame_size)
11092 if (cfun_frame_layout.frame_size >= s390_stack_size)
11094 warning (0, "frame size of function %qs is %wd"
11095 " bytes exceeding user provided stack limit of "
11097 "An unconditional trap is added.",
11098 current_function_name(), cfun_frame_layout.frame_size,
11100 emit_insn (gen_trap ());
11105 /* stack_guard has to be smaller than s390_stack_size.
11106 Otherwise we would emit an AND with zero which would
11107 not match the test under mask pattern. */
11108 if (stack_guard >= s390_stack_size)
11110 warning (0, "frame size of function %qs is %wd"
11111 " bytes which is more than half the stack size. "
11112 "The dynamic check would not be reliable. "
11113 "No check emitted for this function.",
11114 current_function_name(),
11115 cfun_frame_layout.frame_size);
11119 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11120 & ~(stack_guard - 1));
11122 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11123 GEN_INT (stack_check_mask));
11125 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11127 t, const0_rtx, const0_rtx));
11129 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11131 t, const0_rtx, const0_rtx));
11136 if (s390_warn_framesize > 0
11137 && cfun_frame_layout.frame_size >= s390_warn_framesize)
11138 warning (0, "frame size of %qs is %wd bytes",
11139 current_function_name (), cfun_frame_layout.frame_size);
11141 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11142 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11144 /* Save incoming stack pointer into temp reg. */
11145 if (TARGET_BACKCHAIN || next_fpr)
11146 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
11148 /* Subtract frame size from stack pointer. */
11150 if (DISP_IN_RANGE (INTVAL (frame_off)))
11152 insn = gen_rtx_SET (stack_pointer_rtx,
11153 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11155 insn = emit_insn (insn);
11159 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11160 frame_off = force_const_mem (Pmode, frame_off);
11162 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
11163 annotate_constant_pool_refs (&PATTERN (insn));
11166 RTX_FRAME_RELATED_P (insn) = 1;
11167 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11168 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11169 gen_rtx_SET (stack_pointer_rtx,
11170 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11173 /* Set backchain. */
11175 if (TARGET_BACKCHAIN)
11177 if (cfun_frame_layout.backchain_offset)
11178 addr = gen_rtx_MEM (Pmode,
11179 plus_constant (Pmode, stack_pointer_rtx,
11180 cfun_frame_layout.backchain_offset));
11182 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11183 set_mem_alias_set (addr, get_frame_alias_set ());
11184 insn = emit_insn (gen_move_insn (addr, temp_reg));
11187 /* If we support non-call exceptions (e.g. for Java),
11188 we need to make sure the backchain pointer is set up
11189 before any possibly trapping memory access. */
11190 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11192 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11193 emit_clobber (addr);
11197 /* Save fprs 8 - 15 (64 bit ABI). */
11199 if (cfun_save_high_fprs_p && next_fpr)
11201 /* If the stack might be accessed through a different register
11202 we have to make sure that the stack pointer decrement is not
11203 moved below the use of the stack slots. */
11204 s390_emit_stack_tie ();
11206 insn = emit_insn (gen_add2_insn (temp_reg,
11207 GEN_INT (cfun_frame_layout.f8_offset)));
11211 for (i = FPR8_REGNUM; i <= next_fpr; i++)
11212 if (cfun_fpr_save_p (i))
11214 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11215 cfun_frame_layout.frame_size
11216 + cfun_frame_layout.f8_offset
11219 insn = save_fpr (temp_reg, offset, i);
11221 RTX_FRAME_RELATED_P (insn) = 1;
11222 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11223 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11224 gen_rtx_REG (DFmode, i)));
11228 /* Set frame pointer, if needed. */
11230 if (frame_pointer_needed)
11232 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11233 RTX_FRAME_RELATED_P (insn) = 1;
11236 /* Set up got pointer, if needed. */
11238 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11240 rtx_insn *insns = s390_load_got ();
11242 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11243 annotate_constant_pool_refs (&PATTERN (insn));
11248 if (TARGET_TPF_PROFILING)
11250 /* Generate a BAS instruction to serve as a function
11251 entry intercept to facilitate the use of tracing
11252 algorithms located at the branch target. */
11253 emit_insn (gen_prologue_tpf ());
11255 /* Emit a blockage here so that all code
11256 lies between the profiling mechanisms. */
11257 emit_insn (gen_blockage ());
11261 /* Expand the epilogue into a bunch of separate insns. */
11264 s390_emit_epilogue (bool sibcall)
11266 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
11267 int area_bottom, area_top, offset = 0;
11272 if (TARGET_TPF_PROFILING)
11275 /* Generate a BAS instruction to serve as a function
11276 entry intercept to facilitate the use of tracing
11277 algorithms located at the branch target. */
11279 /* Emit a blockage here so that all code
11280 lies between the profiling mechanisms. */
11281 emit_insn (gen_blockage ());
11283 emit_insn (gen_epilogue_tpf ());
11286 /* Check whether to use frame or stack pointer for restore. */
11288 frame_pointer = (frame_pointer_needed
11289 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11291 s390_frame_area (&area_bottom, &area_top);
11293 /* Check whether we can access the register save area.
11294 If not, increment the frame pointer as required. */
11296 if (area_top <= area_bottom)
11298 /* Nothing to restore. */
11300 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11301 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11303 /* Area is in range. */
11304 offset = cfun_frame_layout.frame_size;
11308 rtx insn, frame_off, cfa;
11310 offset = area_bottom < 0 ? -area_bottom : 0;
11311 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11313 cfa = gen_rtx_SET (frame_pointer,
11314 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11315 if (DISP_IN_RANGE (INTVAL (frame_off)))
11317 insn = gen_rtx_SET (frame_pointer,
11318 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11319 insn = emit_insn (insn);
11323 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11324 frame_off = force_const_mem (Pmode, frame_off);
11326 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11327 annotate_constant_pool_refs (&PATTERN (insn));
11329 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11330 RTX_FRAME_RELATED_P (insn) = 1;
11333 /* Restore call saved fprs. */
11337 if (cfun_save_high_fprs_p)
11339 next_offset = cfun_frame_layout.f8_offset;
11340 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11342 if (cfun_fpr_save_p (i))
11344 restore_fpr (frame_pointer,
11345 offset + next_offset, i);
11347 = alloc_reg_note (REG_CFA_RESTORE,
11348 gen_rtx_REG (DFmode, i), cfa_restores);
11357 next_offset = cfun_frame_layout.f4_offset;
11359 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11361 if (cfun_fpr_save_p (i))
11363 restore_fpr (frame_pointer,
11364 offset + next_offset, i);
11366 = alloc_reg_note (REG_CFA_RESTORE,
11367 gen_rtx_REG (DFmode, i), cfa_restores);
11370 else if (!TARGET_PACKED_STACK)
11376 /* Return register. */
11378 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11380 /* Restore call saved gprs. */
11382 if (cfun_frame_layout.first_restore_gpr != -1)
11387 /* Check for global register and save them
11388 to stack location from where they get restored. */
11390 for (i = cfun_frame_layout.first_restore_gpr;
11391 i <= cfun_frame_layout.last_restore_gpr;
11394 if (global_not_special_regno_p (i))
11396 addr = plus_constant (Pmode, frame_pointer,
11397 offset + cfun_frame_layout.gprs_offset
11398 + (i - cfun_frame_layout.first_save_gpr_slot)
11400 addr = gen_rtx_MEM (Pmode, addr);
11401 set_mem_alias_set (addr, get_frame_alias_set ());
11402 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11406 = alloc_reg_note (REG_CFA_RESTORE,
11407 gen_rtx_REG (Pmode, i), cfa_restores);
11412 /* Fetch return address from stack before load multiple,
11413 this will do good for scheduling.
11415 Only do this if we already decided that r14 needs to be
11416 saved to a stack slot. (And not just because r14 happens to
11417 be in between two GPRs which need saving.) Otherwise it
11418 would be difficult to take that decision back in
11419 s390_optimize_prologue. */
11420 if (cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK)
11422 int return_regnum = find_unused_clobbered_reg();
11423 if (!return_regnum)
11425 return_reg = gen_rtx_REG (Pmode, return_regnum);
11427 addr = plus_constant (Pmode, frame_pointer,
11428 offset + cfun_frame_layout.gprs_offset
11430 - cfun_frame_layout.first_save_gpr_slot)
11432 addr = gen_rtx_MEM (Pmode, addr);
11433 set_mem_alias_set (addr, get_frame_alias_set ());
11434 emit_move_insn (return_reg, addr);
11436 /* Once we did that optimization we have to make sure
11437 s390_optimize_prologue does not try to remove the
11438 store of r14 since we will not be able to find the
11439 load issued here. */
11440 cfun_frame_layout.save_return_addr_p = true;
11444 insn = restore_gprs (frame_pointer,
11445 offset + cfun_frame_layout.gprs_offset
11446 + (cfun_frame_layout.first_restore_gpr
11447 - cfun_frame_layout.first_save_gpr_slot)
11449 cfun_frame_layout.first_restore_gpr,
11450 cfun_frame_layout.last_restore_gpr);
11451 insn = emit_insn (insn);
11452 REG_NOTES (insn) = cfa_restores;
11453 add_reg_note (insn, REG_CFA_DEF_CFA,
11454 plus_constant (Pmode, stack_pointer_rtx,
11455 STACK_POINTER_OFFSET));
11456 RTX_FRAME_RELATED_P (insn) = 1;
11459 s390_restore_gprs_from_fprs ();
11464 /* Return to caller. */
11466 p = rtvec_alloc (2);
11468 RTVEC_ELT (p, 0) = ret_rtx;
11469 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
11470 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
11474 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11477 s300_set_up_by_prologue (hard_reg_set_container *regs)
11479 if (cfun->machine->base_reg
11480 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11481 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11484 /* -fsplit-stack support. */
11486 /* A SYMBOL_REF for __morestack. */
11487 static GTY(()) rtx morestack_ref;
11489 /* When using -fsplit-stack, the allocation routines set a field in
11490 the TCB to the bottom of the stack plus this much space, measured
11493 #define SPLIT_STACK_AVAILABLE 1024
11495 /* Emit -fsplit-stack prologue, which goes before the regular function
11499 s390_expand_split_stack_prologue (void)
11501 rtx r1, guard, cc = NULL;
11503 /* Offset from thread pointer to __private_ss. */
11504 int psso = TARGET_64BIT ? 0x38 : 0x20;
11505 /* Pointer size in bytes. */
11506 /* Frame size and argument size - the two parameters to __morestack. */
11507 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11508 /* Align argument size to 8 bytes - simplifies __morestack code. */
11509 HOST_WIDE_INT args_size = crtl->args.size >= 0
11510 ? ((crtl->args.size + 7) & ~7)
11512 /* Label to be called by __morestack. */
11513 rtx_code_label *call_done = NULL;
11514 rtx_code_label *parm_base = NULL;
11517 gcc_assert (flag_split_stack && reload_completed);
11518 if (!TARGET_CPU_ZARCH)
11520 sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11524 r1 = gen_rtx_REG (Pmode, 1);
11526 /* If no stack frame will be allocated, don't do anything. */
11529 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11531 /* If va_start is used, just use r15. */
11532 emit_move_insn (r1,
11533 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11534 GEN_INT (STACK_POINTER_OFFSET)));
11540 if (morestack_ref == NULL_RTX)
11542 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11543 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11544 | SYMBOL_FLAG_FUNCTION);
11547 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11549 /* If frame_size will fit in an add instruction, do a stack space
11550 check, and only call __morestack if there's not enough space. */
11552 /* Get thread pointer. r1 is the only register we can always destroy - r0
11553 could contain a static chain (and cannot be used to address memory
11554 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11555 emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
11556 /* Aim at __private_ss. */
11557 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11559 /* If less that 1kiB used, skip addition and compare directly with
11561 if (frame_size > SPLIT_STACK_AVAILABLE)
11563 emit_move_insn (r1, guard);
11565 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11567 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11571 /* Compare the (maybe adjusted) guard with the stack pointer. */
11572 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11575 call_done = gen_label_rtx ();
11576 parm_base = gen_label_rtx ();
11578 /* Emit the parameter block. */
11579 tmp = gen_split_stack_data (parm_base, call_done,
11580 GEN_INT (frame_size),
11581 GEN_INT (args_size));
11582 insn = emit_insn (tmp);
11583 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11584 LABEL_NUSES (call_done)++;
11585 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11586 LABEL_NUSES (parm_base)++;
11588 /* %r1 = litbase. */
11589 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11590 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11591 LABEL_NUSES (parm_base)++;
11593 /* Now, we need to call __morestack. It has very special calling
11594 conventions: it preserves param/return/static chain registers for
11595 calling main function body, and looks for its own parameters at %r1. */
11599 tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
11601 insn = emit_jump_insn (tmp);
11602 JUMP_LABEL (insn) = call_done;
11603 LABEL_NUSES (call_done)++;
11605 /* Mark the jump as very unlikely to be taken. */
11606 add_int_reg_note (insn, REG_BR_PROB, REG_BR_PROB_BASE / 100);
11608 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11610 /* If va_start is used, and __morestack was not called, just use
11612 emit_move_insn (r1,
11613 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11614 GEN_INT (STACK_POINTER_OFFSET)));
11619 tmp = gen_split_stack_call (morestack_ref, call_done);
11620 insn = emit_jump_insn (tmp);
11621 JUMP_LABEL (insn) = call_done;
11622 LABEL_NUSES (call_done)++;
11626 /* __morestack will call us here. */
11628 emit_label (call_done);
11631 /* We may have to tell the dataflow pass that the split stack prologue
11632 is initializing a register. */
11635 s390_live_on_entry (bitmap regs)
11637 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11639 gcc_assert (flag_split_stack);
11640 bitmap_set_bit (regs, 1);
11644 /* Return true if the function can use simple_return to return outside
11645 of a shrink-wrapped region. At present shrink-wrapping is supported
11649 s390_can_use_simple_return_insn (void)
11654 /* Return true if the epilogue is guaranteed to contain only a return
11655 instruction and if a direct return can therefore be used instead.
11656 One of the main advantages of using direct return instructions
11657 is that we can then use conditional returns. */
11660 s390_can_use_return_insn (void)
11664 if (!reload_completed)
11670 if (TARGET_TPF_PROFILING)
11673 for (i = 0; i < 16; i++)
11674 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11677 /* For 31 bit this is not covered by the frame_size check below
11678 since f4, f6 are saved in the register save area without needing
11679 additional stack space. */
11681 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11684 if (cfun->machine->base_reg
11685 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11688 return cfun_frame_layout.frame_size == 0;
11691 /* The VX ABI differs for vararg functions. Therefore we need the
11692 prototype of the callee to be available when passing vector type
11694 static const char *
11695 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11697 return ((TARGET_VX_ABI
11699 && VECTOR_TYPE_P (TREE_TYPE (val))
11700 && (funcdecl == NULL_TREE
11701 || (TREE_CODE (funcdecl) == FUNCTION_DECL
11702 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11703 ? N_("vector argument passed to unprototyped function")
11708 /* Return the size in bytes of a function argument of
11709 type TYPE and/or mode MODE. At least one of TYPE or
11710 MODE must be specified. */
11713 s390_function_arg_size (machine_mode mode, const_tree type)
11716 return int_size_in_bytes (type);
11718 /* No type info available for some library calls ... */
11719 if (mode != BLKmode)
11720 return GET_MODE_SIZE (mode);
11722 /* If we have neither type nor mode, abort */
11723 gcc_unreachable ();
11726 /* Return true if a function argument of type TYPE and mode MODE
11727 is to be passed in a vector register, if available. */
11730 s390_function_arg_vector (machine_mode mode, const_tree type)
11732 if (!TARGET_VX_ABI)
11735 if (s390_function_arg_size (mode, type) > 16)
11738 /* No type info available for some library calls ... */
11740 return VECTOR_MODE_P (mode);
11742 /* The ABI says that record types with a single member are treated
11743 just like that member would be. */
11744 while (TREE_CODE (type) == RECORD_TYPE)
11746 tree field, single = NULL_TREE;
11748 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11750 if (TREE_CODE (field) != FIELD_DECL)
11753 if (single == NULL_TREE)
11754 single = TREE_TYPE (field);
11759 if (single == NULL_TREE)
11763 /* If the field declaration adds extra byte due to
11764 e.g. padding this is not accepted as vector type. */
11765 if (int_size_in_bytes (single) <= 0
11766 || int_size_in_bytes (single) != int_size_in_bytes (type))
11772 return VECTOR_TYPE_P (type);
11775 /* Return true if a function argument of type TYPE and mode MODE
11776 is to be passed in a floating-point register, if available. */
11779 s390_function_arg_float (machine_mode mode, const_tree type)
11781 if (s390_function_arg_size (mode, type) > 8)
11784 /* Soft-float changes the ABI: no floating-point registers are used. */
11785 if (TARGET_SOFT_FLOAT)
11788 /* No type info available for some library calls ... */
11790 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11792 /* The ABI says that record types with a single member are treated
11793 just like that member would be. */
11794 while (TREE_CODE (type) == RECORD_TYPE)
11796 tree field, single = NULL_TREE;
11798 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11800 if (TREE_CODE (field) != FIELD_DECL)
11803 if (single == NULL_TREE)
11804 single = TREE_TYPE (field);
11809 if (single == NULL_TREE)
11815 return TREE_CODE (type) == REAL_TYPE;
11818 /* Return true if a function argument of type TYPE and mode MODE
11819 is to be passed in an integer register, or a pair of integer
11820 registers, if available. */
11823 s390_function_arg_integer (machine_mode mode, const_tree type)
11825 int size = s390_function_arg_size (mode, type);
11829 /* No type info available for some library calls ... */
11831 return GET_MODE_CLASS (mode) == MODE_INT
11832 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
11834 /* We accept small integral (and similar) types. */
11835 if (INTEGRAL_TYPE_P (type)
11836 || POINTER_TYPE_P (type)
11837 || TREE_CODE (type) == NULLPTR_TYPE
11838 || TREE_CODE (type) == OFFSET_TYPE
11839 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
11842 /* We also accept structs of size 1, 2, 4, 8 that are not
11843 passed in floating-point registers. */
11844 if (AGGREGATE_TYPE_P (type)
11845 && exact_log2 (size) >= 0
11846 && !s390_function_arg_float (mode, type))
11852 /* Return 1 if a function argument of type TYPE and mode MODE
11853 is to be passed by reference. The ABI specifies that only
11854 structures of size 1, 2, 4, or 8 bytes are passed by value,
11855 all other structures (and complex numbers) are passed by
11859 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
11860 machine_mode mode, const_tree type,
11861 bool named ATTRIBUTE_UNUSED)
11863 int size = s390_function_arg_size (mode, type);
11865 if (s390_function_arg_vector (mode, type))
11873 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
11876 if (TREE_CODE (type) == COMPLEX_TYPE
11877 || TREE_CODE (type) == VECTOR_TYPE)
11884 /* Update the data in CUM to advance over an argument of mode MODE and
11885 data type TYPE. (TYPE is null for libcalls where that information
11886 may not be available.). The boolean NAMED specifies whether the
11887 argument is a named argument (as opposed to an unnamed argument
11888 matching an ellipsis). */
11891 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
11892 const_tree type, bool named)
11894 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11896 if (s390_function_arg_vector (mode, type))
11898 /* We are called for unnamed vector stdarg arguments which are
11899 passed on the stack. In this case this hook does not have to
11900 do anything since stack arguments are tracked by common
11906 else if (s390_function_arg_float (mode, type))
11910 else if (s390_function_arg_integer (mode, type))
11912 int size = s390_function_arg_size (mode, type);
11913 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
11916 gcc_unreachable ();
11919 /* Define where to put the arguments to a function.
11920 Value is zero to push the argument on the stack,
11921 or a hard register in which to store the argument.
11923 MODE is the argument's machine mode.
11924 TYPE is the data type of the argument (as a tree).
11925 This is null for libcalls where that information may
11927 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11928 the preceding args and about the function being called.
11929 NAMED is nonzero if this argument is a named parameter
11930 (otherwise it is an extra parameter matching an ellipsis).
11932 On S/390, we use general purpose registers 2 through 6 to
11933 pass integer, pointer, and certain structure arguments, and
11934 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
11935 to pass floating point arguments. All remaining arguments
11936 are pushed to the stack. */
11939 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
11940 const_tree type, bool named)
11942 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11945 s390_check_type_for_vector_abi (type, true, false);
11947 if (s390_function_arg_vector (mode, type))
11949 /* Vector arguments being part of the ellipsis are passed on the
11951 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
11954 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
11956 else if (s390_function_arg_float (mode, type))
11958 if (cum->fprs + 1 > FP_ARG_NUM_REG)
11961 return gen_rtx_REG (mode, cum->fprs + 16);
11963 else if (s390_function_arg_integer (mode, type))
11965 int size = s390_function_arg_size (mode, type);
11966 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11968 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
11970 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
11971 return gen_rtx_REG (mode, cum->gprs + 2);
11972 else if (n_gprs == 2)
11974 rtvec p = rtvec_alloc (2);
11977 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
11980 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
11983 return gen_rtx_PARALLEL (mode, p);
11987 /* After the real arguments, expand_call calls us once again
11988 with a void_type_node type. Whatever we return here is
11989 passed as operand 2 to the call expanders.
11991 We don't need this feature ... */
11992 else if (type == void_type_node)
11995 gcc_unreachable ();
11998 /* Return true if return values of type TYPE should be returned
11999 in a memory buffer whose address is passed by the caller as
12000 hidden first argument. */
12003 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12005 /* We accept small integral (and similar) types. */
12006 if (INTEGRAL_TYPE_P (type)
12007 || POINTER_TYPE_P (type)
12008 || TREE_CODE (type) == OFFSET_TYPE
12009 || TREE_CODE (type) == REAL_TYPE)
12010 return int_size_in_bytes (type) > 8;
12012 /* vector types which fit into a VR. */
12014 && VECTOR_TYPE_P (type)
12015 && int_size_in_bytes (type) <= 16)
12018 /* Aggregates and similar constructs are always returned
12020 if (AGGREGATE_TYPE_P (type)
12021 || TREE_CODE (type) == COMPLEX_TYPE
12022 || VECTOR_TYPE_P (type))
12025 /* ??? We get called on all sorts of random stuff from
12026 aggregate_value_p. We can't abort, but it's not clear
12027 what's safe to return. Pretend it's a struct I guess. */
12031 /* Function arguments and return values are promoted to word size. */
12033 static machine_mode
12034 s390_promote_function_mode (const_tree type, machine_mode mode,
12036 const_tree fntype ATTRIBUTE_UNUSED,
12037 int for_return ATTRIBUTE_UNUSED)
12039 if (INTEGRAL_MODE_P (mode)
12040 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12042 if (type != NULL_TREE && POINTER_TYPE_P (type))
12043 *punsignedp = POINTERS_EXTEND_UNSIGNED;
12050 /* Define where to return a (scalar) value of type RET_TYPE.
12051 If RET_TYPE is null, define where to return a (scalar)
12052 value of mode MODE from a libcall. */
12055 s390_function_and_libcall_value (machine_mode mode,
12056 const_tree ret_type,
12057 const_tree fntype_or_decl,
12058 bool outgoing ATTRIBUTE_UNUSED)
12060 /* For vector return types it is important to use the RET_TYPE
12061 argument whenever available since the middle-end might have
12062 changed the mode to a scalar mode. */
12063 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12064 || (!ret_type && VECTOR_MODE_P (mode)));
12066 /* For normal functions perform the promotion as
12067 promote_function_mode would do. */
12070 int unsignedp = TYPE_UNSIGNED (ret_type);
12071 mode = promote_function_mode (ret_type, mode, &unsignedp,
12072 fntype_or_decl, 1);
12075 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12076 || SCALAR_FLOAT_MODE_P (mode)
12077 || (TARGET_VX_ABI && vector_ret_type_p));
12078 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12080 if (TARGET_VX_ABI && vector_ret_type_p)
12081 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12082 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12083 return gen_rtx_REG (mode, 16);
12084 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12085 || UNITS_PER_LONG == UNITS_PER_WORD)
12086 return gen_rtx_REG (mode, 2);
12087 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12089 /* This case is triggered when returning a 64 bit value with
12090 -m31 -mzarch. Although the value would fit into a single
12091 register it has to be forced into a 32 bit register pair in
12092 order to match the ABI. */
12093 rtvec p = rtvec_alloc (2);
12096 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12098 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12100 return gen_rtx_PARALLEL (mode, p);
12103 gcc_unreachable ();
12106 /* Define where to return a scalar return value of type RET_TYPE. */
12109 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12112 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12113 fn_decl_or_type, outgoing);
12116 /* Define where to return a scalar libcall return value of mode
12120 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12122 return s390_function_and_libcall_value (mode, NULL_TREE,
12127 /* Create and return the va_list datatype.
12129 On S/390, va_list is an array type equivalent to
12131 typedef struct __va_list_tag
12135 void *__overflow_arg_area;
12136 void *__reg_save_area;
12139 where __gpr and __fpr hold the number of general purpose
12140 or floating point arguments used up to now, respectively,
12141 __overflow_arg_area points to the stack location of the
12142 next argument passed on the stack, and __reg_save_area
12143 always points to the start of the register area in the
12144 call frame of the current function. The function prologue
12145 saves all registers used for argument passing into this
12146 area if the function uses variable arguments. */
12149 s390_build_builtin_va_list (void)
12151 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12153 record = lang_hooks.types.make_type (RECORD_TYPE);
12156 build_decl (BUILTINS_LOCATION,
12157 TYPE_DECL, get_identifier ("__va_list_tag"), record);
12159 f_gpr = build_decl (BUILTINS_LOCATION,
12160 FIELD_DECL, get_identifier ("__gpr"),
12161 long_integer_type_node);
12162 f_fpr = build_decl (BUILTINS_LOCATION,
12163 FIELD_DECL, get_identifier ("__fpr"),
12164 long_integer_type_node);
12165 f_ovf = build_decl (BUILTINS_LOCATION,
12166 FIELD_DECL, get_identifier ("__overflow_arg_area"),
12168 f_sav = build_decl (BUILTINS_LOCATION,
12169 FIELD_DECL, get_identifier ("__reg_save_area"),
12172 va_list_gpr_counter_field = f_gpr;
12173 va_list_fpr_counter_field = f_fpr;
12175 DECL_FIELD_CONTEXT (f_gpr) = record;
12176 DECL_FIELD_CONTEXT (f_fpr) = record;
12177 DECL_FIELD_CONTEXT (f_ovf) = record;
12178 DECL_FIELD_CONTEXT (f_sav) = record;
12180 TYPE_STUB_DECL (record) = type_decl;
12181 TYPE_NAME (record) = type_decl;
12182 TYPE_FIELDS (record) = f_gpr;
12183 DECL_CHAIN (f_gpr) = f_fpr;
12184 DECL_CHAIN (f_fpr) = f_ovf;
12185 DECL_CHAIN (f_ovf) = f_sav;
12187 layout_type (record);
12189 /* The correct type is an array type of one element. */
12190 return build_array_type (record, build_index_type (size_zero_node));
12193 /* Implement va_start by filling the va_list structure VALIST.
12194 STDARG_P is always true, and ignored.
12195 NEXTARG points to the first anonymous stack argument.
12197 The following global variables are used to initialize
12198 the va_list structure:
12201 holds number of gprs and fprs used for named arguments.
12202 crtl->args.arg_offset_rtx:
12203 holds the offset of the first anonymous stack argument
12204 (relative to the virtual arg pointer). */
12207 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12209 HOST_WIDE_INT n_gpr, n_fpr;
12211 tree f_gpr, f_fpr, f_ovf, f_sav;
12212 tree gpr, fpr, ovf, sav, t;
12214 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12215 f_fpr = DECL_CHAIN (f_gpr);
12216 f_ovf = DECL_CHAIN (f_fpr);
12217 f_sav = DECL_CHAIN (f_ovf);
12219 valist = build_simple_mem_ref (valist);
12220 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12221 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12222 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12223 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12225 /* Count number of gp and fp argument registers used. */
12227 n_gpr = crtl->args.info.gprs;
12228 n_fpr = crtl->args.info.fprs;
12230 if (cfun->va_list_gpr_size)
12232 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12233 build_int_cst (NULL_TREE, n_gpr));
12234 TREE_SIDE_EFFECTS (t) = 1;
12235 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12238 if (cfun->va_list_fpr_size)
12240 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12241 build_int_cst (NULL_TREE, n_fpr));
12242 TREE_SIDE_EFFECTS (t) = 1;
12243 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12246 if (flag_split_stack
12247 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12249 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12254 reg = gen_reg_rtx (Pmode);
12255 cfun->machine->split_stack_varargs_pointer = reg;
12258 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12259 seq = get_insns ();
12262 push_topmost_sequence ();
12263 emit_insn_after (seq, entry_of_function ());
12264 pop_topmost_sequence ();
12267 /* Find the overflow area.
12268 FIXME: This currently is too pessimistic when the vector ABI is
12269 enabled. In that case we *always* set up the overflow area
12271 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12272 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12275 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12276 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12278 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12280 off = INTVAL (crtl->args.arg_offset_rtx);
12281 off = off < 0 ? 0 : off;
12282 if (TARGET_DEBUG_ARG)
12283 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12284 (int)n_gpr, (int)n_fpr, off);
12286 t = fold_build_pointer_plus_hwi (t, off);
12288 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12289 TREE_SIDE_EFFECTS (t) = 1;
12290 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12293 /* Find the register save area. */
12294 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12295 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12297 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12298 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12300 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12301 TREE_SIDE_EFFECTS (t) = 1;
12302 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12306 /* Implement va_arg by updating the va_list structure
12307 VALIST as required to retrieve an argument of type
12308 TYPE, and returning that argument.
12310 Generates code equivalent to:
12312 if (integral value) {
12313 if (size <= 4 && args.gpr < 5 ||
12314 size > 4 && args.gpr < 4 )
12315 ret = args.reg_save_area[args.gpr+8]
12317 ret = *args.overflow_arg_area++;
12318 } else if (vector value) {
12319 ret = *args.overflow_arg_area;
12320 args.overflow_arg_area += size / 8;
12321 } else if (float value) {
12323 ret = args.reg_save_area[args.fpr+64]
12325 ret = *args.overflow_arg_area++;
12326 } else if (aggregate value) {
12328 ret = *args.reg_save_area[args.gpr]
12330 ret = **args.overflow_arg_area++;
12334 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12335 gimple_seq *post_p ATTRIBUTE_UNUSED)
12337 tree f_gpr, f_fpr, f_ovf, f_sav;
12338 tree gpr, fpr, ovf, sav, reg, t, u;
12339 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12340 tree lab_false, lab_over = NULL_TREE;
12341 tree addr = create_tmp_var (ptr_type_node, "addr");
12342 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12345 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12346 f_fpr = DECL_CHAIN (f_gpr);
12347 f_ovf = DECL_CHAIN (f_fpr);
12348 f_sav = DECL_CHAIN (f_ovf);
12350 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12351 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12352 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12354 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12355 both appear on a lhs. */
12356 valist = unshare_expr (valist);
12357 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12359 size = int_size_in_bytes (type);
12361 s390_check_type_for_vector_abi (type, true, false);
12363 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12365 if (TARGET_DEBUG_ARG)
12367 fprintf (stderr, "va_arg: aggregate type");
12371 /* Aggregates are passed by reference. */
12376 /* kernel stack layout on 31 bit: It is assumed here that no padding
12377 will be added by s390_frame_info because for va_args always an even
12378 number of gprs has to be saved r15-r2 = 14 regs. */
12379 sav_ofs = 2 * UNITS_PER_LONG;
12380 sav_scale = UNITS_PER_LONG;
12381 size = UNITS_PER_LONG;
12382 max_reg = GP_ARG_NUM_REG - n_reg;
12383 left_align_p = false;
12385 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12387 if (TARGET_DEBUG_ARG)
12389 fprintf (stderr, "va_arg: vector type");
12399 left_align_p = true;
12401 else if (s390_function_arg_float (TYPE_MODE (type), type))
12403 if (TARGET_DEBUG_ARG)
12405 fprintf (stderr, "va_arg: float type");
12409 /* FP args go in FP registers, if present. */
12413 sav_ofs = 16 * UNITS_PER_LONG;
12415 max_reg = FP_ARG_NUM_REG - n_reg;
12416 left_align_p = false;
12420 if (TARGET_DEBUG_ARG)
12422 fprintf (stderr, "va_arg: other type");
12426 /* Otherwise into GP registers. */
12429 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12431 /* kernel stack layout on 31 bit: It is assumed here that no padding
12432 will be added by s390_frame_info because for va_args always an even
12433 number of gprs has to be saved r15-r2 = 14 regs. */
12434 sav_ofs = 2 * UNITS_PER_LONG;
12436 if (size < UNITS_PER_LONG)
12437 sav_ofs += UNITS_PER_LONG - size;
12439 sav_scale = UNITS_PER_LONG;
12440 max_reg = GP_ARG_NUM_REG - n_reg;
12441 left_align_p = false;
12444 /* Pull the value out of the saved registers ... */
12446 if (reg != NULL_TREE)
12449 if (reg > ((typeof (reg))max_reg))
12452 addr = sav + sav_ofs + reg * save_scale;
12459 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12460 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12462 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12463 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12464 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12465 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12466 gimplify_and_add (t, pre_p);
12468 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12469 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12470 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12471 t = fold_build_pointer_plus (t, u);
12473 gimplify_assign (addr, t, pre_p);
12475 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12477 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12480 /* ... Otherwise out of the overflow area. */
12483 if (size < UNITS_PER_LONG && !left_align_p)
12484 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12486 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12488 gimplify_assign (addr, t, pre_p);
12490 if (size < UNITS_PER_LONG && left_align_p)
12491 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12493 t = fold_build_pointer_plus_hwi (t, size);
12495 gimplify_assign (ovf, t, pre_p);
12497 if (reg != NULL_TREE)
12498 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12501 /* Increment register save count. */
12505 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12506 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12507 gimplify_and_add (u, pre_p);
12512 t = build_pointer_type_for_mode (build_pointer_type (type),
12514 addr = fold_convert (t, addr);
12515 addr = build_va_arg_indirect_ref (addr);
12519 t = build_pointer_type_for_mode (type, ptr_mode, true);
12520 addr = fold_convert (t, addr);
12523 return build_va_arg_indirect_ref (addr);
12526 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12528 DEST - Register location where CC will be stored.
12529 TDB - Pointer to a 256 byte area where to store the transaction.
12530 diagnostic block. NULL if TDB is not needed.
12531 RETRY - Retry count value. If non-NULL a retry loop for CC2
12533 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12534 of the tbegin instruction pattern. */
12537 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12539 rtx retry_plus_two = gen_reg_rtx (SImode);
12540 rtx retry_reg = gen_reg_rtx (SImode);
12541 rtx_code_label *retry_label = NULL;
12543 if (retry != NULL_RTX)
12545 emit_move_insn (retry_reg, retry);
12546 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12547 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12548 retry_label = gen_label_rtx ();
12549 emit_label (retry_label);
12552 if (clobber_fprs_p)
12555 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12558 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12562 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12565 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12566 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12568 UNSPEC_CC_TO_INT));
12569 if (retry != NULL_RTX)
12571 const int CC0 = 1 << 3;
12572 const int CC1 = 1 << 2;
12573 const int CC3 = 1 << 0;
12575 rtx count = gen_reg_rtx (SImode);
12576 rtx_code_label *leave_label = gen_label_rtx ();
12578 /* Exit for success and permanent failures. */
12579 jump = s390_emit_jump (leave_label,
12580 gen_rtx_EQ (VOIDmode,
12581 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12582 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12583 LABEL_NUSES (leave_label) = 1;
12585 /* CC2 - transient failure. Perform retry with ppa. */
12586 emit_move_insn (count, retry_plus_two);
12587 emit_insn (gen_subsi3 (count, count, retry_reg));
12588 emit_insn (gen_tx_assist (count));
12589 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12592 JUMP_LABEL (jump) = retry_label;
12593 LABEL_NUSES (retry_label) = 1;
12594 emit_label (leave_label);
12599 /* Return the decl for the target specific builtin with the function
12603 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12605 if (fcode >= S390_BUILTIN_MAX)
12606 return error_mark_node;
12608 return s390_builtin_decls[fcode];
12611 /* We call mcount before the function prologue. So a profiled leaf
12612 function should stay a leaf function. */
12615 s390_keep_leaf_when_profiled ()
12620 /* Output assembly code for the trampoline template to
12623 On S/390, we use gpr 1 internally in the trampoline code;
12624 gpr 0 is used to hold the static chain. */
12627 s390_asm_trampoline_template (FILE *file)
12630 op[0] = gen_rtx_REG (Pmode, 0);
12631 op[1] = gen_rtx_REG (Pmode, 1);
12635 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12636 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
12637 output_asm_insn ("br\t%1", op); /* 2 byte */
12638 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12642 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12643 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
12644 output_asm_insn ("br\t%1", op); /* 2 byte */
12645 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12649 /* Emit RTL insns to initialize the variable parts of a trampoline.
12650 FNADDR is an RTX for the address of the function's pure code.
12651 CXT is an RTX for the static chain value for the function. */
12654 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12656 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12659 emit_block_move (m_tramp, assemble_trampoline_template (),
12660 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12662 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12663 emit_move_insn (mem, cxt);
12664 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12665 emit_move_insn (mem, fnaddr);
12668 /* Output assembler code to FILE to increment profiler label # LABELNO
12669 for profiling a function entry. */
12672 s390_function_profiler (FILE *file, int labelno)
12677 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12679 fprintf (file, "# function profiler \n");
12681 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12682 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12683 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12685 op[2] = gen_rtx_REG (Pmode, 1);
12686 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12687 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12689 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
12692 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12693 op[4] = gen_rtx_CONST (Pmode, op[4]);
12698 output_asm_insn ("stg\t%0,%1", op);
12699 output_asm_insn ("larl\t%2,%3", op);
12700 output_asm_insn ("brasl\t%0,%4", op);
12701 output_asm_insn ("lg\t%0,%1", op);
12703 else if (TARGET_CPU_ZARCH)
12705 output_asm_insn ("st\t%0,%1", op);
12706 output_asm_insn ("larl\t%2,%3", op);
12707 output_asm_insn ("brasl\t%0,%4", op);
12708 output_asm_insn ("l\t%0,%1", op);
12710 else if (!flag_pic)
12712 op[6] = gen_label_rtx ();
12714 output_asm_insn ("st\t%0,%1", op);
12715 output_asm_insn ("bras\t%2,%l6", op);
12716 output_asm_insn (".long\t%4", op);
12717 output_asm_insn (".long\t%3", op);
12718 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12719 output_asm_insn ("l\t%0,0(%2)", op);
12720 output_asm_insn ("l\t%2,4(%2)", op);
12721 output_asm_insn ("basr\t%0,%0", op);
12722 output_asm_insn ("l\t%0,%1", op);
12726 op[5] = gen_label_rtx ();
12727 op[6] = gen_label_rtx ();
12729 output_asm_insn ("st\t%0,%1", op);
12730 output_asm_insn ("bras\t%2,%l6", op);
12731 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
12732 output_asm_insn (".long\t%4-%l5", op);
12733 output_asm_insn (".long\t%3-%l5", op);
12734 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12735 output_asm_insn ("lr\t%0,%2", op);
12736 output_asm_insn ("a\t%0,0(%2)", op);
12737 output_asm_insn ("a\t%2,4(%2)", op);
12738 output_asm_insn ("basr\t%0,%0", op);
12739 output_asm_insn ("l\t%0,%1", op);
12743 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
12744 into its SYMBOL_REF_FLAGS. */
12747 s390_encode_section_info (tree decl, rtx rtl, int first)
12749 default_encode_section_info (decl, rtl, first);
12751 if (TREE_CODE (decl) == VAR_DECL)
12753 /* Store the alignment to be able to check if we can use
12754 a larl/load-relative instruction. We only handle the cases
12755 that can go wrong (i.e. no FUNC_DECLs). */
12756 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
12757 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12758 else if (DECL_ALIGN (decl) % 32)
12759 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12760 else if (DECL_ALIGN (decl) % 64)
12761 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12764 /* Literal pool references don't have a decl so they are handled
12765 differently here. We rely on the information in the MEM_ALIGN
12766 entry to decide upon the alignment. */
12768 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
12769 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
12771 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
12772 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12773 else if (MEM_ALIGN (rtl) % 32)
12774 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12775 else if (MEM_ALIGN (rtl) % 64)
12776 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12780 /* Output thunk to FILE that implements a C++ virtual function call (with
12781 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
12782 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
12783 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
12784 relative to the resulting this pointer. */
12787 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
12788 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12794 /* Make sure unwind info is emitted for the thunk if needed. */
12795 final_start_function (emit_barrier (), file, 1);
12797 /* Operand 0 is the target function. */
12798 op[0] = XEXP (DECL_RTL (function), 0);
12799 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
12802 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
12803 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
12804 op[0] = gen_rtx_CONST (Pmode, op[0]);
12807 /* Operand 1 is the 'this' pointer. */
12808 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12809 op[1] = gen_rtx_REG (Pmode, 3);
12811 op[1] = gen_rtx_REG (Pmode, 2);
12813 /* Operand 2 is the delta. */
12814 op[2] = GEN_INT (delta);
12816 /* Operand 3 is the vcall_offset. */
12817 op[3] = GEN_INT (vcall_offset);
12819 /* Operand 4 is the temporary register. */
12820 op[4] = gen_rtx_REG (Pmode, 1);
12822 /* Operands 5 to 8 can be used as labels. */
12828 /* Operand 9 can be used for temporary register. */
12831 /* Generate code. */
12834 /* Setup literal pool pointer if required. */
12835 if ((!DISP_IN_RANGE (delta)
12836 && !CONST_OK_FOR_K (delta)
12837 && !CONST_OK_FOR_Os (delta))
12838 || (!DISP_IN_RANGE (vcall_offset)
12839 && !CONST_OK_FOR_K (vcall_offset)
12840 && !CONST_OK_FOR_Os (vcall_offset)))
12842 op[5] = gen_label_rtx ();
12843 output_asm_insn ("larl\t%4,%5", op);
12846 /* Add DELTA to this pointer. */
12849 if (CONST_OK_FOR_J (delta))
12850 output_asm_insn ("la\t%1,%2(%1)", op);
12851 else if (DISP_IN_RANGE (delta))
12852 output_asm_insn ("lay\t%1,%2(%1)", op);
12853 else if (CONST_OK_FOR_K (delta))
12854 output_asm_insn ("aghi\t%1,%2", op);
12855 else if (CONST_OK_FOR_Os (delta))
12856 output_asm_insn ("agfi\t%1,%2", op);
12859 op[6] = gen_label_rtx ();
12860 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
12864 /* Perform vcall adjustment. */
12867 if (DISP_IN_RANGE (vcall_offset))
12869 output_asm_insn ("lg\t%4,0(%1)", op);
12870 output_asm_insn ("ag\t%1,%3(%4)", op);
12872 else if (CONST_OK_FOR_K (vcall_offset))
12874 output_asm_insn ("lghi\t%4,%3", op);
12875 output_asm_insn ("ag\t%4,0(%1)", op);
12876 output_asm_insn ("ag\t%1,0(%4)", op);
12878 else if (CONST_OK_FOR_Os (vcall_offset))
12880 output_asm_insn ("lgfi\t%4,%3", op);
12881 output_asm_insn ("ag\t%4,0(%1)", op);
12882 output_asm_insn ("ag\t%1,0(%4)", op);
12886 op[7] = gen_label_rtx ();
12887 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
12888 output_asm_insn ("ag\t%4,0(%1)", op);
12889 output_asm_insn ("ag\t%1,0(%4)", op);
12893 /* Jump to target. */
12894 output_asm_insn ("jg\t%0", op);
12896 /* Output literal pool if required. */
12899 output_asm_insn (".align\t4", op);
12900 targetm.asm_out.internal_label (file, "L",
12901 CODE_LABEL_NUMBER (op[5]));
12905 targetm.asm_out.internal_label (file, "L",
12906 CODE_LABEL_NUMBER (op[6]));
12907 output_asm_insn (".long\t%2", op);
12911 targetm.asm_out.internal_label (file, "L",
12912 CODE_LABEL_NUMBER (op[7]));
12913 output_asm_insn (".long\t%3", op);
12918 /* Setup base pointer if required. */
12920 || (!DISP_IN_RANGE (delta)
12921 && !CONST_OK_FOR_K (delta)
12922 && !CONST_OK_FOR_Os (delta))
12923 || (!DISP_IN_RANGE (delta)
12924 && !CONST_OK_FOR_K (vcall_offset)
12925 && !CONST_OK_FOR_Os (vcall_offset)))
12927 op[5] = gen_label_rtx ();
12928 output_asm_insn ("basr\t%4,0", op);
12929 targetm.asm_out.internal_label (file, "L",
12930 CODE_LABEL_NUMBER (op[5]));
12933 /* Add DELTA to this pointer. */
12936 if (CONST_OK_FOR_J (delta))
12937 output_asm_insn ("la\t%1,%2(%1)", op);
12938 else if (DISP_IN_RANGE (delta))
12939 output_asm_insn ("lay\t%1,%2(%1)", op);
12940 else if (CONST_OK_FOR_K (delta))
12941 output_asm_insn ("ahi\t%1,%2", op);
12942 else if (CONST_OK_FOR_Os (delta))
12943 output_asm_insn ("afi\t%1,%2", op);
12946 op[6] = gen_label_rtx ();
12947 output_asm_insn ("a\t%1,%6-%5(%4)", op);
12951 /* Perform vcall adjustment. */
12954 if (CONST_OK_FOR_J (vcall_offset))
12956 output_asm_insn ("l\t%4,0(%1)", op);
12957 output_asm_insn ("a\t%1,%3(%4)", op);
12959 else if (DISP_IN_RANGE (vcall_offset))
12961 output_asm_insn ("l\t%4,0(%1)", op);
12962 output_asm_insn ("ay\t%1,%3(%4)", op);
12964 else if (CONST_OK_FOR_K (vcall_offset))
12966 output_asm_insn ("lhi\t%4,%3", op);
12967 output_asm_insn ("a\t%4,0(%1)", op);
12968 output_asm_insn ("a\t%1,0(%4)", op);
12970 else if (CONST_OK_FOR_Os (vcall_offset))
12972 output_asm_insn ("iilf\t%4,%3", op);
12973 output_asm_insn ("a\t%4,0(%1)", op);
12974 output_asm_insn ("a\t%1,0(%4)", op);
12978 op[7] = gen_label_rtx ();
12979 output_asm_insn ("l\t%4,%7-%5(%4)", op);
12980 output_asm_insn ("a\t%4,0(%1)", op);
12981 output_asm_insn ("a\t%1,0(%4)", op);
12984 /* We had to clobber the base pointer register.
12985 Re-setup the base pointer (with a different base). */
12986 op[5] = gen_label_rtx ();
12987 output_asm_insn ("basr\t%4,0", op);
12988 targetm.asm_out.internal_label (file, "L",
12989 CODE_LABEL_NUMBER (op[5]));
12992 /* Jump to target. */
12993 op[8] = gen_label_rtx ();
12996 output_asm_insn ("l\t%4,%8-%5(%4)", op);
12997 else if (!nonlocal)
12998 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12999 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13000 else if (flag_pic == 1)
13002 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13003 output_asm_insn ("l\t%4,%0(%4)", op);
13005 else if (flag_pic == 2)
13007 op[9] = gen_rtx_REG (Pmode, 0);
13008 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13009 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13010 output_asm_insn ("ar\t%4,%9", op);
13011 output_asm_insn ("l\t%4,0(%4)", op);
13014 output_asm_insn ("br\t%4", op);
13016 /* Output literal pool. */
13017 output_asm_insn (".align\t4", op);
13019 if (nonlocal && flag_pic == 2)
13020 output_asm_insn (".long\t%0", op);
13023 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13024 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13027 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13029 output_asm_insn (".long\t%0", op);
13031 output_asm_insn (".long\t%0-%5", op);
13035 targetm.asm_out.internal_label (file, "L",
13036 CODE_LABEL_NUMBER (op[6]));
13037 output_asm_insn (".long\t%2", op);
13041 targetm.asm_out.internal_label (file, "L",
13042 CODE_LABEL_NUMBER (op[7]));
13043 output_asm_insn (".long\t%3", op);
13046 final_end_function ();
13050 s390_valid_pointer_mode (machine_mode mode)
13052 return (mode == SImode || (TARGET_64BIT && mode == DImode));
13055 /* Checks whether the given CALL_EXPR would use a caller
13056 saved register. This is used to decide whether sibling call
13057 optimization could be performed on the respective function
13061 s390_call_saved_register_used (tree call_expr)
13063 CUMULATIVE_ARGS cum_v;
13064 cumulative_args_t cum;
13071 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13072 cum = pack_cumulative_args (&cum_v);
13074 for (i = 0; i < call_expr_nargs (call_expr); i++)
13076 parameter = CALL_EXPR_ARG (call_expr, i);
13077 gcc_assert (parameter);
13079 /* For an undeclared variable passed as parameter we will get
13080 an ERROR_MARK node here. */
13081 if (TREE_CODE (parameter) == ERROR_MARK)
13084 type = TREE_TYPE (parameter);
13087 mode = TYPE_MODE (type);
13090 /* We assume that in the target function all parameters are
13091 named. This only has an impact on vector argument register
13092 usage none of which is call-saved. */
13093 if (pass_by_reference (&cum_v, mode, type, true))
13096 type = build_pointer_type (type);
13099 parm_rtx = s390_function_arg (cum, mode, type, true);
13101 s390_function_arg_advance (cum, mode, type, true);
13106 if (REG_P (parm_rtx))
13109 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
13111 if (!call_used_regs[reg + REGNO (parm_rtx)])
13115 if (GET_CODE (parm_rtx) == PARALLEL)
13119 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13121 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13123 gcc_assert (REG_P (r));
13126 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
13128 if (!call_used_regs[reg + REGNO (r)])
13137 /* Return true if the given call expression can be
13138 turned into a sibling call.
13139 DECL holds the declaration of the function to be called whereas
13140 EXP is the call expression itself. */
13143 s390_function_ok_for_sibcall (tree decl, tree exp)
13145 /* The TPF epilogue uses register 1. */
13146 if (TARGET_TPF_PROFILING)
13149 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13150 which would have to be restored before the sibcall. */
13151 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13154 /* Register 6 on s390 is available as an argument register but unfortunately
13155 "caller saved". This makes functions needing this register for arguments
13156 not suitable for sibcalls. */
13157 return !s390_call_saved_register_used (exp);
13160 /* Return the fixed registers used for condition codes. */
13163 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13166 *p2 = INVALID_REGNUM;
13171 /* This function is used by the call expanders of the machine description.
13172 It emits the call insn itself together with the necessary operations
13173 to adjust the target address and returns the emitted insn.
13174 ADDR_LOCATION is the target address rtx
13175 TLS_CALL the location of the thread-local symbol
13176 RESULT_REG the register where the result of the call should be stored
13177 RETADDR_REG the register where the return address should be stored
13178 If this parameter is NULL_RTX the call is considered
13179 to be a sibling call. */
13182 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13185 bool plt_call = false;
13191 /* Direct function calls need special treatment. */
13192 if (GET_CODE (addr_location) == SYMBOL_REF)
13194 /* When calling a global routine in PIC mode, we must
13195 replace the symbol itself with the PLT stub. */
13196 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13198 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13200 addr_location = gen_rtx_UNSPEC (Pmode,
13201 gen_rtvec (1, addr_location),
13203 addr_location = gen_rtx_CONST (Pmode, addr_location);
13207 /* For -fpic code the PLT entries might use r12 which is
13208 call-saved. Therefore we cannot do a sibcall when
13209 calling directly using a symbol ref. When reaching
13210 this point we decided (in s390_function_ok_for_sibcall)
13211 to do a sibcall for a function pointer but one of the
13212 optimizers was able to get rid of the function pointer
13213 by propagating the symbol ref into the call. This
13214 optimization is illegal for S/390 so we turn the direct
13215 call into a indirect call again. */
13216 addr_location = force_reg (Pmode, addr_location);
13219 /* Unless we can use the bras(l) insn, force the
13220 routine address into a register. */
13221 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
13224 addr_location = legitimize_pic_address (addr_location, 0);
13226 addr_location = force_reg (Pmode, addr_location);
13230 /* If it is already an indirect call or the code above moved the
13231 SYMBOL_REF to somewhere else make sure the address can be found in
13233 if (retaddr_reg == NULL_RTX
13234 && GET_CODE (addr_location) != SYMBOL_REF
13237 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13238 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13241 addr_location = gen_rtx_MEM (QImode, addr_location);
13242 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13244 if (result_reg != NULL_RTX)
13245 call = gen_rtx_SET (result_reg, call);
13247 if (retaddr_reg != NULL_RTX)
13249 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13251 if (tls_call != NULL_RTX)
13252 vec = gen_rtvec (3, call, clobber,
13253 gen_rtx_USE (VOIDmode, tls_call));
13255 vec = gen_rtvec (2, call, clobber);
13257 call = gen_rtx_PARALLEL (VOIDmode, vec);
13260 insn = emit_call_insn (call);
13262 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13263 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13265 /* s390_function_ok_for_sibcall should
13266 have denied sibcalls in this case. */
13267 gcc_assert (retaddr_reg != NULL_RTX);
13268 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13273 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13276 s390_conditional_register_usage (void)
13282 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13283 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13285 if (TARGET_CPU_ZARCH)
13287 fixed_regs[BASE_REGNUM] = 0;
13288 call_used_regs[BASE_REGNUM] = 0;
13289 fixed_regs[RETURN_REGNUM] = 0;
13290 call_used_regs[RETURN_REGNUM] = 0;
13294 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13295 call_used_regs[i] = call_really_used_regs[i] = 0;
13299 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13300 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13303 if (TARGET_SOFT_FLOAT)
13305 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13306 call_used_regs[i] = fixed_regs[i] = 1;
13309 /* Disable v16 - v31 for non-vector target. */
13312 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13313 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13317 /* Corresponding function to eh_return expander. */
13319 static GTY(()) rtx s390_tpf_eh_return_symbol;
13321 s390_emit_tpf_eh_return (rtx target)
13326 if (!s390_tpf_eh_return_symbol)
13327 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13329 reg = gen_rtx_REG (Pmode, 2);
13330 orig_ra = gen_rtx_REG (Pmode, 3);
13332 emit_move_insn (reg, target);
13333 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13334 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13335 gen_rtx_REG (Pmode, RETURN_REGNUM));
13336 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13337 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13339 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13342 /* Rework the prologue/epilogue to avoid saving/restoring
13343 registers unnecessarily. */
13346 s390_optimize_prologue (void)
13348 rtx_insn *insn, *new_insn, *next_insn;
13350 /* Do a final recompute of the frame-related data. */
13351 s390_optimize_register_info ();
13353 /* If all special registers are in fact used, there's nothing we
13354 can do, so no point in walking the insn list. */
13356 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13357 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
13358 && (TARGET_CPU_ZARCH
13359 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
13360 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
13363 /* Search for prologue/epilogue insns and replace them. */
13365 for (insn = get_insns (); insn; insn = next_insn)
13367 int first, last, off;
13368 rtx set, base, offset;
13371 next_insn = NEXT_INSN (insn);
13373 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13376 pat = PATTERN (insn);
13378 /* Remove ldgr/lgdr instructions used for saving and restore
13379 GPRs if possible. */
13384 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13385 tmp_pat = XVECEXP (pat, 0, 0);
13387 if (GET_CODE (tmp_pat) == SET
13388 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13389 && REG_P (SET_SRC (tmp_pat))
13390 && REG_P (SET_DEST (tmp_pat)))
13392 int src_regno = REGNO (SET_SRC (tmp_pat));
13393 int dest_regno = REGNO (SET_DEST (tmp_pat));
13397 if (!((GENERAL_REGNO_P (src_regno)
13398 && FP_REGNO_P (dest_regno))
13399 || (FP_REGNO_P (src_regno)
13400 && GENERAL_REGNO_P (dest_regno))))
13403 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13404 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13406 /* GPR must be call-saved, FPR must be call-clobbered. */
13407 if (!call_really_used_regs[fpr_regno]
13408 || call_really_used_regs[gpr_regno])
13411 /* It must not happen that what we once saved in an FPR now
13412 needs a stack slot. */
13413 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13415 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13417 remove_insn (insn);
13423 if (GET_CODE (pat) == PARALLEL
13424 && store_multiple_operation (pat, VOIDmode))
13426 set = XVECEXP (pat, 0, 0);
13427 first = REGNO (SET_SRC (set));
13428 last = first + XVECLEN (pat, 0) - 1;
13429 offset = const0_rtx;
13430 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13431 off = INTVAL (offset);
13433 if (GET_CODE (base) != REG || off < 0)
13435 if (cfun_frame_layout.first_save_gpr != -1
13436 && (cfun_frame_layout.first_save_gpr < first
13437 || cfun_frame_layout.last_save_gpr > last))
13439 if (REGNO (base) != STACK_POINTER_REGNUM
13440 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13442 if (first > BASE_REGNUM || last < BASE_REGNUM)
13445 if (cfun_frame_layout.first_save_gpr != -1)
13447 rtx s_pat = save_gprs (base,
13448 off + (cfun_frame_layout.first_save_gpr
13449 - first) * UNITS_PER_LONG,
13450 cfun_frame_layout.first_save_gpr,
13451 cfun_frame_layout.last_save_gpr);
13452 new_insn = emit_insn_before (s_pat, insn);
13453 INSN_ADDRESSES_NEW (new_insn, -1);
13456 remove_insn (insn);
13460 if (cfun_frame_layout.first_save_gpr == -1
13461 && GET_CODE (pat) == SET
13462 && GENERAL_REG_P (SET_SRC (pat))
13463 && GET_CODE (SET_DEST (pat)) == MEM)
13466 first = REGNO (SET_SRC (set));
13467 offset = const0_rtx;
13468 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13469 off = INTVAL (offset);
13471 if (GET_CODE (base) != REG || off < 0)
13473 if (REGNO (base) != STACK_POINTER_REGNUM
13474 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13477 remove_insn (insn);
13481 if (GET_CODE (pat) == PARALLEL
13482 && load_multiple_operation (pat, VOIDmode))
13484 set = XVECEXP (pat, 0, 0);
13485 first = REGNO (SET_DEST (set));
13486 last = first + XVECLEN (pat, 0) - 1;
13487 offset = const0_rtx;
13488 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13489 off = INTVAL (offset);
13491 if (GET_CODE (base) != REG || off < 0)
13494 if (cfun_frame_layout.first_restore_gpr != -1
13495 && (cfun_frame_layout.first_restore_gpr < first
13496 || cfun_frame_layout.last_restore_gpr > last))
13498 if (REGNO (base) != STACK_POINTER_REGNUM
13499 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13501 if (first > BASE_REGNUM || last < BASE_REGNUM)
13504 if (cfun_frame_layout.first_restore_gpr != -1)
13506 rtx rpat = restore_gprs (base,
13507 off + (cfun_frame_layout.first_restore_gpr
13508 - first) * UNITS_PER_LONG,
13509 cfun_frame_layout.first_restore_gpr,
13510 cfun_frame_layout.last_restore_gpr);
13512 /* Remove REG_CFA_RESTOREs for registers that we no
13513 longer need to save. */
13514 REG_NOTES (rpat) = REG_NOTES (insn);
13515 for (rtx *ptr = ®_NOTES (rpat); *ptr; )
13516 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13517 && ((int) REGNO (XEXP (*ptr, 0))
13518 < cfun_frame_layout.first_restore_gpr))
13519 *ptr = XEXP (*ptr, 1);
13521 ptr = &XEXP (*ptr, 1);
13522 new_insn = emit_insn_before (rpat, insn);
13523 RTX_FRAME_RELATED_P (new_insn) = 1;
13524 INSN_ADDRESSES_NEW (new_insn, -1);
13527 remove_insn (insn);
13531 if (cfun_frame_layout.first_restore_gpr == -1
13532 && GET_CODE (pat) == SET
13533 && GENERAL_REG_P (SET_DEST (pat))
13534 && GET_CODE (SET_SRC (pat)) == MEM)
13537 first = REGNO (SET_DEST (set));
13538 offset = const0_rtx;
13539 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13540 off = INTVAL (offset);
13542 if (GET_CODE (base) != REG || off < 0)
13545 if (REGNO (base) != STACK_POINTER_REGNUM
13546 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13549 remove_insn (insn);
13555 /* On z10 and later the dynamic branch prediction must see the
13556 backward jump within a certain windows. If not it falls back to
13557 the static prediction. This function rearranges the loop backward
13558 branch in a way which makes the static prediction always correct.
13559 The function returns true if it added an instruction. */
13561 s390_fix_long_loop_prediction (rtx_insn *insn)
13563 rtx set = single_set (insn);
13564 rtx code_label, label_ref;
13565 rtx_insn *uncond_jump;
13566 rtx_insn *cur_insn;
13570 /* This will exclude branch on count and branch on index patterns
13571 since these are correctly statically predicted. */
13573 || SET_DEST (set) != pc_rtx
13574 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13577 /* Skip conditional returns. */
13578 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13579 && XEXP (SET_SRC (set), 2) == pc_rtx)
13582 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13583 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13585 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13587 code_label = XEXP (label_ref, 0);
13589 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13590 || INSN_ADDRESSES (INSN_UID (insn)) == -1
13591 || (INSN_ADDRESSES (INSN_UID (insn))
13592 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13595 for (distance = 0, cur_insn = PREV_INSN (insn);
13596 distance < PREDICT_DISTANCE - 6;
13597 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13598 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13601 rtx_code_label *new_label = gen_label_rtx ();
13602 uncond_jump = emit_jump_insn_after (
13603 gen_rtx_SET (pc_rtx,
13604 gen_rtx_LABEL_REF (VOIDmode, code_label)),
13606 emit_label_after (new_label, uncond_jump);
13608 tmp = XEXP (SET_SRC (set), 1);
13609 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13610 XEXP (SET_SRC (set), 2) = tmp;
13611 INSN_CODE (insn) = -1;
13613 XEXP (label_ref, 0) = new_label;
13614 JUMP_LABEL (insn) = new_label;
13615 JUMP_LABEL (uncond_jump) = code_label;
13620 /* Returns 1 if INSN reads the value of REG for purposes not related
13621 to addressing of memory, and 0 otherwise. */
13623 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
13625 return reg_referenced_p (reg, PATTERN (insn))
13626 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
13629 /* Starting from INSN find_cond_jump looks downwards in the insn
13630 stream for a single jump insn which is the last user of the
13631 condition code set in INSN. */
13633 find_cond_jump (rtx_insn *insn)
13635 for (; insn; insn = NEXT_INSN (insn))
13639 if (LABEL_P (insn))
13642 if (!JUMP_P (insn))
13644 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
13649 /* This will be triggered by a return. */
13650 if (GET_CODE (PATTERN (insn)) != SET)
13653 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
13654 ite = SET_SRC (PATTERN (insn));
13656 if (GET_CODE (ite) != IF_THEN_ELSE)
13659 cc = XEXP (XEXP (ite, 0), 0);
13660 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
13663 if (find_reg_note (insn, REG_DEAD, cc))
13671 /* Swap the condition in COND and the operands in OP0 and OP1 so that
13672 the semantics does not change. If NULL_RTX is passed as COND the
13673 function tries to find the conditional jump starting with INSN. */
13675 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
13679 if (cond == NULL_RTX)
13681 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
13682 rtx set = jump ? single_set (jump) : NULL_RTX;
13684 if (set == NULL_RTX)
13687 cond = XEXP (SET_SRC (set), 0);
13692 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
13695 /* On z10, instructions of the compare-and-branch family have the
13696 property to access the register occurring as second operand with
13697 its bits complemented. If such a compare is grouped with a second
13698 instruction that accesses the same register non-complemented, and
13699 if that register's value is delivered via a bypass, then the
13700 pipeline recycles, thereby causing significant performance decline.
13701 This function locates such situations and exchanges the two
13702 operands of the compare. The function return true whenever it
13705 s390_z10_optimize_cmp (rtx_insn *insn)
13707 rtx_insn *prev_insn, *next_insn;
13708 bool insn_added_p = false;
13709 rtx cond, *op0, *op1;
13711 if (GET_CODE (PATTERN (insn)) == PARALLEL)
13713 /* Handle compare and branch and branch on count
13715 rtx pattern = single_set (insn);
13718 || SET_DEST (pattern) != pc_rtx
13719 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
13722 cond = XEXP (SET_SRC (pattern), 0);
13723 op0 = &XEXP (cond, 0);
13724 op1 = &XEXP (cond, 1);
13726 else if (GET_CODE (PATTERN (insn)) == SET)
13730 /* Handle normal compare instructions. */
13731 src = SET_SRC (PATTERN (insn));
13732 dest = SET_DEST (PATTERN (insn));
13735 || !CC_REGNO_P (REGNO (dest))
13736 || GET_CODE (src) != COMPARE)
13739 /* s390_swap_cmp will try to find the conditional
13740 jump when passing NULL_RTX as condition. */
13742 op0 = &XEXP (src, 0);
13743 op1 = &XEXP (src, 1);
13748 if (!REG_P (*op0) || !REG_P (*op1))
13751 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
13754 /* Swap the COMPARE arguments and its mask if there is a
13755 conflicting access in the previous insn. */
13756 prev_insn = prev_active_insn (insn);
13757 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13758 && reg_referenced_p (*op1, PATTERN (prev_insn)))
13759 s390_swap_cmp (cond, op0, op1, insn);
13761 /* Check if there is a conflict with the next insn. If there
13762 was no conflict with the previous insn, then swap the
13763 COMPARE arguments and its mask. If we already swapped
13764 the operands, or if swapping them would cause a conflict
13765 with the previous insn, issue a NOP after the COMPARE in
13766 order to separate the two instuctions. */
13767 next_insn = next_active_insn (insn);
13768 if (next_insn != NULL_RTX && INSN_P (next_insn)
13769 && s390_non_addr_reg_read_p (*op1, next_insn))
13771 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13772 && s390_non_addr_reg_read_p (*op0, prev_insn))
13774 if (REGNO (*op1) == 0)
13775 emit_insn_after (gen_nop1 (), insn);
13777 emit_insn_after (gen_nop (), insn);
13778 insn_added_p = true;
13781 s390_swap_cmp (cond, op0, op1, insn);
13783 return insn_added_p;
13786 /* Number of INSNs to be scanned backward in the last BB of the loop
13787 and forward in the first BB of the loop. This usually should be a
13788 bit more than the number of INSNs which could go into one
13790 #define S390_OSC_SCAN_INSN_NUM 5
13792 /* Scan LOOP for static OSC collisions and return true if a osc_break
13793 should be issued for this loop. */
13795 s390_adjust_loop_scan_osc (struct loop* loop)
13798 HARD_REG_SET modregs, newregs;
13799 rtx_insn *insn, *store_insn = NULL;
13801 struct s390_address addr_store, addr_load;
13802 subrtx_iterator::array_type array;
13805 CLEAR_HARD_REG_SET (modregs);
13808 FOR_BB_INSNS_REVERSE (loop->latch, insn)
13810 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13814 if (insn_count > S390_OSC_SCAN_INSN_NUM)
13817 find_all_hard_reg_sets (insn, &newregs, true);
13818 IOR_HARD_REG_SET (modregs, newregs);
13820 set = single_set (insn);
13824 if (MEM_P (SET_DEST (set))
13825 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
13832 if (store_insn == NULL_RTX)
13836 FOR_BB_INSNS (loop->header, insn)
13838 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13841 if (insn == store_insn)
13845 if (insn_count > S390_OSC_SCAN_INSN_NUM)
13848 find_all_hard_reg_sets (insn, &newregs, true);
13849 IOR_HARD_REG_SET (modregs, newregs);
13851 set = single_set (insn);
13855 /* An intermediate store disrupts static OSC checking
13857 if (MEM_P (SET_DEST (set))
13858 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
13861 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
13863 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
13864 && rtx_equal_p (addr_load.base, addr_store.base)
13865 && rtx_equal_p (addr_load.indx, addr_store.indx)
13866 && rtx_equal_p (addr_load.disp, addr_store.disp))
13868 if ((addr_load.base != NULL_RTX
13869 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
13870 || (addr_load.indx != NULL_RTX
13871 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
13878 /* Look for adjustments which can be done on simple innermost
13881 s390_adjust_loops ()
13883 struct loop *loop = NULL;
13886 compute_bb_for_insn ();
13888 /* Find the loops. */
13889 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
13891 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
13895 flow_loop_dump (loop, dump_file, NULL, 0);
13896 fprintf (dump_file, ";; OSC loop scan Loop: ");
13898 if (loop->latch == NULL
13899 || pc_set (BB_END (loop->latch)) == NULL_RTX
13900 || !s390_adjust_loop_scan_osc (loop))
13904 if (loop->latch == NULL)
13905 fprintf (dump_file, " muliple backward jumps\n");
13908 fprintf (dump_file, " header insn: %d latch insn: %d ",
13909 INSN_UID (BB_HEAD (loop->header)),
13910 INSN_UID (BB_END (loop->latch)));
13911 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
13912 fprintf (dump_file, " loop does not end with jump\n");
13914 fprintf (dump_file, " not instrumented\n");
13920 rtx_insn *new_insn;
13923 fprintf (dump_file, " adding OSC break insn: ");
13924 new_insn = emit_insn_before (gen_osc_break (),
13925 BB_END (loop->latch));
13926 INSN_ADDRESSES_NEW (new_insn, -1);
13930 loop_optimizer_finalize ();
13932 df_finish_pass (false);
13935 /* Perform machine-dependent processing. */
13940 bool pool_overflow = false;
13941 int hw_before, hw_after;
13943 if (s390_tune == PROCESSOR_2964_Z13)
13944 s390_adjust_loops ();
13946 /* Make sure all splits have been performed; splits after
13947 machine_dependent_reorg might confuse insn length counts. */
13948 split_all_insns_noflow ();
13950 /* Install the main literal pool and the associated base
13951 register load insns.
13953 In addition, there are two problematic situations we need
13956 - the literal pool might be > 4096 bytes in size, so that
13957 some of its elements cannot be directly accessed
13959 - a branch target might be > 64K away from the branch, so that
13960 it is not possible to use a PC-relative instruction.
13962 To fix those, we split the single literal pool into multiple
13963 pool chunks, reloading the pool base register at various
13964 points throughout the function to ensure it always points to
13965 the pool chunk the following code expects, and / or replace
13966 PC-relative branches by absolute branches.
13968 However, the two problems are interdependent: splitting the
13969 literal pool can move a branch further away from its target,
13970 causing the 64K limit to overflow, and on the other hand,
13971 replacing a PC-relative branch by an absolute branch means
13972 we need to put the branch target address into the literal
13973 pool, possibly causing it to overflow.
13975 So, we loop trying to fix up both problems until we manage
13976 to satisfy both conditions at the same time. Note that the
13977 loop is guaranteed to terminate as every pass of the loop
13978 strictly decreases the total number of PC-relative branches
13979 in the function. (This is not completely true as there
13980 might be branch-over-pool insns introduced by chunkify_start.
13981 Those never need to be split however.) */
13985 struct constant_pool *pool = NULL;
13987 /* Collect the literal pool. */
13988 if (!pool_overflow)
13990 pool = s390_mainpool_start ();
13992 pool_overflow = true;
13995 /* If literal pool overflowed, start to chunkify it. */
13997 pool = s390_chunkify_start ();
13999 /* Split out-of-range branches. If this has created new
14000 literal pool entries, cancel current chunk list and
14001 recompute it. zSeries machines have large branch
14002 instructions, so we never need to split a branch. */
14003 if (!TARGET_CPU_ZARCH && s390_split_branches ())
14006 s390_chunkify_cancel (pool);
14008 s390_mainpool_cancel (pool);
14013 /* If we made it up to here, both conditions are satisfied.
14014 Finish up literal pool related changes. */
14016 s390_chunkify_finish (pool);
14018 s390_mainpool_finish (pool);
14020 /* We're done splitting branches. */
14021 cfun->machine->split_branches_pending_p = false;
14025 /* Generate out-of-pool execute target insns. */
14026 if (TARGET_CPU_ZARCH)
14028 rtx_insn *insn, *target;
14031 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14033 label = s390_execute_label (insn);
14037 gcc_assert (label != const0_rtx);
14039 target = emit_label (XEXP (label, 0));
14040 INSN_ADDRESSES_NEW (target, -1);
14042 target = emit_insn (s390_execute_target (insn));
14043 INSN_ADDRESSES_NEW (target, -1);
14047 /* Try to optimize prologue and epilogue further. */
14048 s390_optimize_prologue ();
14050 /* Walk over the insns and do some >=z10 specific changes. */
14051 if (s390_tune >= PROCESSOR_2097_Z10)
14054 bool insn_added_p = false;
14056 /* The insn lengths and addresses have to be up to date for the
14057 following manipulations. */
14058 shorten_branches (get_insns ());
14060 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14062 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14066 insn_added_p |= s390_fix_long_loop_prediction (insn);
14068 if ((GET_CODE (PATTERN (insn)) == PARALLEL
14069 || GET_CODE (PATTERN (insn)) == SET)
14070 && s390_tune == PROCESSOR_2097_Z10)
14071 insn_added_p |= s390_z10_optimize_cmp (insn);
14074 /* Adjust branches if we added new instructions. */
14076 shorten_branches (get_insns ());
14079 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14084 /* Insert NOPs for hotpatching. */
14085 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14087 1. inside the area covered by debug information to allow setting
14088 breakpoints at the NOPs,
14089 2. before any insn which results in an asm instruction,
14090 3. before in-function labels to avoid jumping to the NOPs, for
14091 example as part of a loop,
14092 4. before any barrier in case the function is completely empty
14093 (__builtin_unreachable ()) and has neither internal labels nor
14096 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14098 /* Output a series of NOPs before the first active insn. */
14099 while (insn && hw_after > 0)
14101 if (hw_after >= 3 && TARGET_CPU_ZARCH)
14103 emit_insn_before (gen_nop_6_byte (), insn);
14106 else if (hw_after >= 2)
14108 emit_insn_before (gen_nop_4_byte (), insn);
14113 emit_insn_before (gen_nop_2_byte (), insn);
14120 /* Return true if INSN is a fp load insn writing register REGNO. */
14122 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14125 enum attr_type flag = s390_safe_attr_type (insn);
14127 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14130 set = single_set (insn);
14132 if (set == NULL_RTX)
14135 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14138 if (REGNO (SET_DEST (set)) != regno)
14144 /* This value describes the distance to be avoided between an
14145 arithmetic fp instruction and an fp load writing the same register.
14146 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14147 fine but the exact value has to be avoided. Otherwise the FP
14148 pipeline will throw an exception causing a major penalty. */
14149 #define Z10_EARLYLOAD_DISTANCE 7
14151 /* Rearrange the ready list in order to avoid the situation described
14152 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14153 moved to the very end of the ready list. */
14155 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14157 unsigned int regno;
14158 int nready = *nready_p;
14163 enum attr_type flag;
14166 /* Skip DISTANCE - 1 active insns. */
14167 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14168 distance > 0 && insn != NULL_RTX;
14169 distance--, insn = prev_active_insn (insn))
14170 if (CALL_P (insn) || JUMP_P (insn))
14173 if (insn == NULL_RTX)
14176 set = single_set (insn);
14178 if (set == NULL_RTX || !REG_P (SET_DEST (set))
14179 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14182 flag = s390_safe_attr_type (insn);
14184 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14187 regno = REGNO (SET_DEST (set));
14190 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14197 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14202 /* The s390_sched_state variable tracks the state of the current or
14203 the last instruction group.
14205 0,1,2 number of instructions scheduled in the current group
14206 3 the last group is complete - normal insns
14207 4 the last group was a cracked/expanded insn */
14209 static int s390_sched_state;
14211 #define S390_SCHED_STATE_NORMAL 3
14212 #define S390_SCHED_STATE_CRACKED 4
14214 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14215 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14216 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14217 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14219 static unsigned int
14220 s390_get_sched_attrmask (rtx_insn *insn)
14222 unsigned int mask = 0;
14226 case PROCESSOR_2827_ZEC12:
14227 if (get_attr_zEC12_cracked (insn))
14228 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14229 if (get_attr_zEC12_expanded (insn))
14230 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14231 if (get_attr_zEC12_endgroup (insn))
14232 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14233 if (get_attr_zEC12_groupalone (insn))
14234 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14236 case PROCESSOR_2964_Z13:
14237 case PROCESSOR_ARCH12:
14238 if (get_attr_z13_cracked (insn))
14239 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14240 if (get_attr_z13_expanded (insn))
14241 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14242 if (get_attr_z13_endgroup (insn))
14243 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14244 if (get_attr_z13_groupalone (insn))
14245 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14248 gcc_unreachable ();
14253 static unsigned int
14254 s390_get_unit_mask (rtx_insn *insn, int *units)
14256 unsigned int mask = 0;
14260 case PROCESSOR_2964_Z13:
14261 case PROCESSOR_ARCH12:
14263 if (get_attr_z13_unit_lsu (insn))
14265 if (get_attr_z13_unit_fxu (insn))
14267 if (get_attr_z13_unit_vfu (insn))
14271 gcc_unreachable ();
14276 /* Return the scheduling score for INSN. The higher the score the
14277 better. The score is calculated from the OOO scheduling attributes
14278 of INSN and the scheduling state s390_sched_state. */
14280 s390_sched_score (rtx_insn *insn)
14282 unsigned int mask = s390_get_sched_attrmask (insn);
14285 switch (s390_sched_state)
14288 /* Try to put insns into the first slot which would otherwise
14290 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14291 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14293 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14297 /* Prefer not cracked insns while trying to put together a
14299 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14300 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14301 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14303 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14307 /* Prefer not cracked insns while trying to put together a
14309 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14310 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14311 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14313 /* Prefer endgroup insns in the last slot. */
14314 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14317 case S390_SCHED_STATE_NORMAL:
14318 /* Prefer not cracked insns if the last was not cracked. */
14319 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14320 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
14322 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14325 case S390_SCHED_STATE_CRACKED:
14326 /* Try to keep cracked insns together to prevent them from
14327 interrupting groups. */
14328 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14329 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14334 if (s390_tune >= PROCESSOR_2964_Z13)
14337 unsigned unit_mask, m = 1;
14339 unit_mask = s390_get_unit_mask (insn, &units);
14340 gcc_assert (units <= MAX_SCHED_UNITS);
14342 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14343 ago the last insn of this unit type got scheduled. This is
14344 supposed to help providing a proper instruction mix to the
14346 for (i = 0; i < units; i++, m <<= 1)
14348 score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
14349 MAX_SCHED_MIX_DISTANCE);
14354 /* This function is called via hook TARGET_SCHED_REORDER before
14355 issuing one insn from list READY which contains *NREADYP entries.
14356 For target z10 it reorders load instructions to avoid early load
14357 conflicts in the floating point pipeline */
14359 s390_sched_reorder (FILE *file, int verbose,
14360 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14362 if (s390_tune == PROCESSOR_2097_Z10
14363 && reload_completed
14365 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14367 if (s390_tune >= PROCESSOR_2827_ZEC12
14368 && reload_completed
14372 int last_index = *nreadyp - 1;
14373 int max_index = -1;
14374 int max_score = -1;
14377 /* Just move the insn with the highest score to the top (the
14378 end) of the list. A full sort is not needed since a conflict
14379 in the hazard recognition cannot happen. So the top insn in
14380 the ready list will always be taken. */
14381 for (i = last_index; i >= 0; i--)
14385 if (recog_memoized (ready[i]) < 0)
14388 score = s390_sched_score (ready[i]);
14389 if (score > max_score)
14396 if (max_index != -1)
14398 if (max_index != last_index)
14400 tmp = ready[max_index];
14401 ready[max_index] = ready[last_index];
14402 ready[last_index] = tmp;
14406 ";;\t\tBACKEND: move insn %d to the top of list\n",
14407 INSN_UID (ready[last_index]));
14409 else if (verbose > 5)
14411 ";;\t\tBACKEND: best insn %d already on top\n",
14412 INSN_UID (ready[last_index]));
14417 fprintf (file, "ready list ooo attributes - sched state: %d\n",
14420 for (i = last_index; i >= 0; i--)
14422 unsigned int sched_mask;
14423 rtx_insn *insn = ready[i];
14425 if (recog_memoized (insn) < 0)
14428 sched_mask = s390_get_sched_attrmask (insn);
14429 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14431 s390_sched_score (insn));
14432 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14433 ((M) & sched_mask) ? #ATTR : "");
14434 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14435 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14436 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14437 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14438 #undef PRINT_SCHED_ATTR
14439 if (s390_tune >= PROCESSOR_2964_Z13)
14441 unsigned int unit_mask, m = 1;
14444 unit_mask = s390_get_unit_mask (insn, &units);
14445 fprintf (file, "(units:");
14446 for (j = 0; j < units; j++, m <<= 1)
14448 fprintf (file, " u%d", j);
14449 fprintf (file, ")");
14451 fprintf (file, "\n");
14456 return s390_issue_rate ();
14460 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14461 the scheduler has issued INSN. It stores the last issued insn into
14462 last_scheduled_insn in order to make it available for
14463 s390_sched_reorder. */
14465 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14467 last_scheduled_insn = insn;
14469 if (s390_tune >= PROCESSOR_2827_ZEC12
14470 && reload_completed
14471 && recog_memoized (insn) >= 0)
14473 unsigned int mask = s390_get_sched_attrmask (insn);
14475 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14476 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14477 s390_sched_state = S390_SCHED_STATE_CRACKED;
14478 else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
14479 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14480 s390_sched_state = S390_SCHED_STATE_NORMAL;
14483 /* Only normal insns are left (mask == 0). */
14484 switch (s390_sched_state)
14489 case S390_SCHED_STATE_NORMAL:
14490 if (s390_sched_state == S390_SCHED_STATE_NORMAL)
14491 s390_sched_state = 1;
14493 s390_sched_state++;
14496 case S390_SCHED_STATE_CRACKED:
14497 s390_sched_state = S390_SCHED_STATE_NORMAL;
14502 if (s390_tune >= PROCESSOR_2964_Z13)
14505 unsigned unit_mask, m = 1;
14507 unit_mask = s390_get_unit_mask (insn, &units);
14508 gcc_assert (units <= MAX_SCHED_UNITS);
14510 for (i = 0; i < units; i++, m <<= 1)
14512 last_scheduled_unit_distance[i] = 0;
14513 else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
14514 last_scheduled_unit_distance[i]++;
14519 unsigned int sched_mask;
14521 sched_mask = s390_get_sched_attrmask (insn);
14523 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
14524 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
14525 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14526 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14527 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14528 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14529 #undef PRINT_SCHED_ATTR
14531 if (s390_tune >= PROCESSOR_2964_Z13)
14533 unsigned int unit_mask, m = 1;
14536 unit_mask = s390_get_unit_mask (insn, &units);
14537 fprintf (file, "(units:");
14538 for (j = 0; j < units; j++, m <<= 1)
14540 fprintf (file, " %d", j);
14541 fprintf (file, ")");
14543 fprintf (file, " sched state: %d\n", s390_sched_state);
14545 if (s390_tune >= PROCESSOR_2964_Z13)
14549 s390_get_unit_mask (insn, &units);
14551 fprintf (file, ";;\t\tBACKEND: units unused for: ");
14552 for (j = 0; j < units; j++)
14553 fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
14554 fprintf (file, "\n");
14559 if (GET_CODE (PATTERN (insn)) != USE
14560 && GET_CODE (PATTERN (insn)) != CLOBBER)
14567 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
14568 int verbose ATTRIBUTE_UNUSED,
14569 int max_ready ATTRIBUTE_UNUSED)
14571 last_scheduled_insn = NULL;
14572 memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
14573 s390_sched_state = 0;
14576 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
14577 a new number struct loop *loop should be unrolled if tuned for cpus with
14578 a built-in stride prefetcher.
14579 The loop is analyzed for memory accesses by calling check_dpu for
14580 each rtx of the loop. Depending on the loop_depth and the amount of
14581 memory accesses a new number <=nunroll is returned to improve the
14582 behavior of the hardware prefetch unit. */
14584 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
14589 unsigned mem_count = 0;
14591 if (s390_tune < PROCESSOR_2097_Z10)
14594 /* Count the number of memory references within the loop body. */
14595 bbs = get_loop_body (loop);
14596 subrtx_iterator::array_type array;
14597 for (i = 0; i < loop->num_nodes; i++)
14598 FOR_BB_INSNS (bbs[i], insn)
14599 if (INSN_P (insn) && INSN_CODE (insn) != -1)
14600 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14605 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
14606 if (mem_count == 0)
14609 switch (loop_depth(loop))
14612 return MIN (nunroll, 28 / mem_count);
14614 return MIN (nunroll, 22 / mem_count);
14616 return MIN (nunroll, 16 / mem_count);
14620 /* Restore the current options. This is a hook function and also called
14624 s390_function_specific_restore (struct gcc_options *opts,
14625 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
14627 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
14631 s390_option_override_internal (bool main_args_p,
14632 struct gcc_options *opts,
14633 const struct gcc_options *opts_set)
14635 const char *prefix;
14636 const char *suffix;
14638 /* Set up prefix/suffix so the error messages refer to either the command
14639 line argument, or the attribute(target). */
14647 prefix = "option(\"";
14652 /* Architecture mode defaults according to ABI. */
14653 if (!(opts_set->x_target_flags & MASK_ZARCH))
14656 opts->x_target_flags |= MASK_ZARCH;
14658 opts->x_target_flags &= ~MASK_ZARCH;
14661 /* Set the march default in case it hasn't been specified on cmdline. */
14662 if (!opts_set->x_s390_arch)
14663 opts->x_s390_arch = PROCESSOR_2064_Z900;
14664 else if (opts->x_s390_arch == PROCESSOR_9672_G5
14665 || opts->x_s390_arch == PROCESSOR_9672_G6)
14666 warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
14667 "in future releases; use at least %sarch=z900%s",
14668 prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
14669 suffix, prefix, suffix);
14671 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
14673 /* Determine processor to tune for. */
14674 if (!opts_set->x_s390_tune)
14675 opts->x_s390_tune = opts->x_s390_arch;
14676 else if (opts->x_s390_tune == PROCESSOR_9672_G5
14677 || opts->x_s390_tune == PROCESSOR_9672_G6)
14678 warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
14679 "in future releases; use at least %stune=z900%s",
14680 prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
14681 suffix, prefix, suffix);
14683 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
14685 /* Sanity checks. */
14686 if (opts->x_s390_arch == PROCESSOR_NATIVE
14687 || opts->x_s390_tune == PROCESSOR_NATIVE)
14688 gcc_unreachable ();
14689 if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
14690 error ("z/Architecture mode not supported on %s",
14691 processor_table[(int)opts->x_s390_arch].name);
14692 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
14693 error ("64-bit ABI not supported in ESA/390 mode");
14695 /* Enable hardware transactions if available and not explicitly
14696 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
14697 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
14699 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
14700 opts->x_target_flags |= MASK_OPT_HTM;
14702 opts->x_target_flags &= ~MASK_OPT_HTM;
14705 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
14707 if (TARGET_OPT_VX_P (opts->x_target_flags))
14709 if (!TARGET_CPU_VX_P (opts))
14710 error ("hardware vector support not available on %s",
14711 processor_table[(int)opts->x_s390_arch].name);
14712 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14713 error ("hardware vector support not available with -msoft-float");
14718 if (TARGET_CPU_VX_P (opts))
14719 /* Enable vector support if available and not explicitly disabled
14720 by user. E.g. with -m31 -march=z13 -mzarch */
14721 opts->x_target_flags |= MASK_OPT_VX;
14723 opts->x_target_flags &= ~MASK_OPT_VX;
14726 /* Use hardware DFP if available and not explicitly disabled by
14727 user. E.g. with -m31 -march=z10 -mzarch */
14728 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
14730 if (TARGET_DFP_P (opts))
14731 opts->x_target_flags |= MASK_HARD_DFP;
14733 opts->x_target_flags &= ~MASK_HARD_DFP;
14736 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
14738 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
14740 if (!TARGET_CPU_DFP_P (opts))
14741 error ("hardware decimal floating point instructions"
14742 " not available on %s",
14743 processor_table[(int)opts->x_s390_arch].name);
14744 if (!TARGET_ZARCH_P (opts->x_target_flags))
14745 error ("hardware decimal floating point instructions"
14746 " not available in ESA/390 mode");
14749 opts->x_target_flags &= ~MASK_HARD_DFP;
14752 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
14753 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14755 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
14756 && TARGET_HARD_DFP_P (opts->x_target_flags))
14757 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
14759 opts->x_target_flags &= ~MASK_HARD_DFP;
14762 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
14763 && TARGET_PACKED_STACK_P (opts->x_target_flags)
14764 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
14765 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
14768 if (opts->x_s390_stack_size)
14770 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
14771 error ("stack size must be greater than the stack guard value");
14772 else if (opts->x_s390_stack_size > 1 << 16)
14773 error ("stack size must not be greater than 64k");
14775 else if (opts->x_s390_stack_guard)
14776 error ("-mstack-guard implies use of -mstack-size");
14778 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
14779 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
14780 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
14783 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
14785 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
14786 opts->x_param_values,
14787 opts_set->x_param_values);
14788 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
14789 opts->x_param_values,
14790 opts_set->x_param_values);
14791 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
14792 opts->x_param_values,
14793 opts_set->x_param_values);
14794 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
14795 opts->x_param_values,
14796 opts_set->x_param_values);
14799 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
14800 opts->x_param_values,
14801 opts_set->x_param_values);
14802 /* values for loop prefetching */
14803 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
14804 opts->x_param_values,
14805 opts_set->x_param_values);
14806 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
14807 opts->x_param_values,
14808 opts_set->x_param_values);
14809 /* s390 has more than 2 levels and the size is much larger. Since
14810 we are always running virtualized assume that we only get a small
14811 part of the caches above l1. */
14812 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
14813 opts->x_param_values,
14814 opts_set->x_param_values);
14815 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
14816 opts->x_param_values,
14817 opts_set->x_param_values);
14818 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
14819 opts->x_param_values,
14820 opts_set->x_param_values);
14822 /* Use the alternative scheduling-pressure algorithm by default. */
14823 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
14824 opts->x_param_values,
14825 opts_set->x_param_values);
14827 maybe_set_param_value (PARAM_MIN_VECT_LOOP_BOUND, 2,
14828 opts->x_param_values,
14829 opts_set->x_param_values);
14831 /* Call target specific restore function to do post-init work. At the moment,
14832 this just sets opts->x_s390_cost_pointer. */
14833 s390_function_specific_restore (opts, NULL);
14837 s390_option_override (void)
14840 cl_deferred_option *opt;
14841 vec<cl_deferred_option> *v =
14842 (vec<cl_deferred_option> *) s390_deferred_options;
14845 FOR_EACH_VEC_ELT (*v, i, opt)
14847 switch (opt->opt_index)
14849 case OPT_mhotpatch_:
14856 strncpy (s, opt->arg, 256);
14858 t = strchr (s, ',');
14863 val1 = integral_argument (s);
14864 val2 = integral_argument (t);
14871 if (val1 == -1 || val2 == -1)
14873 /* argument is not a plain number */
14874 error ("arguments to %qs should be non-negative integers",
14878 else if (val1 > s390_hotpatch_hw_max
14879 || val2 > s390_hotpatch_hw_max)
14881 error ("argument to %qs is too large (max. %d)",
14882 "-mhotpatch=n,m", s390_hotpatch_hw_max);
14885 s390_hotpatch_hw_before_label = val1;
14886 s390_hotpatch_hw_after_label = val2;
14890 gcc_unreachable ();
14894 /* Set up function hooks. */
14895 init_machine_status = s390_init_machine_status;
14897 s390_option_override_internal (true, &global_options, &global_options_set);
14899 /* Save the initial options in case the user does function specific
14901 target_option_default_node = build_target_option_node (&global_options);
14902 target_option_current_node = target_option_default_node;
14904 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
14905 requires the arch flags to be evaluated already. Since prefetching
14906 is beneficial on s390, we enable it if available. */
14907 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
14908 flag_prefetch_loop_arrays = 1;
14912 /* Don't emit DWARF3/4 unless specifically selected. The TPF
14913 debuggers do not yet support DWARF 3/4. */
14914 if (!global_options_set.x_dwarf_strict)
14916 if (!global_options_set.x_dwarf_version)
14920 /* Register a target-specific optimization-and-lowering pass
14921 to run immediately before prologue and epilogue generation.
14923 Registering the pass must be done at start up. It's
14924 convenient to do it here. */
14925 opt_pass *new_pass = new pass_s390_early_mach (g);
14926 struct register_pass_info insert_pass_s390_early_mach =
14928 new_pass, /* pass */
14929 "pro_and_epilogue", /* reference_pass_name */
14930 1, /* ref_pass_instance_number */
14931 PASS_POS_INSERT_BEFORE /* po_op */
14933 register_pass (&insert_pass_s390_early_mach);
14936 #if S390_USE_TARGET_ATTRIBUTE
14937 /* Inner function to process the attribute((target(...))), take an argument and
14938 set the current options from the argument. If we have a list, recursively go
14942 s390_valid_target_attribute_inner_p (tree args,
14943 struct gcc_options *opts,
14944 struct gcc_options *new_opts_set,
14950 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
14951 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
14952 static const struct
14954 const char *string;
14958 int only_as_pragma;
14961 S390_ATTRIB ("arch=", OPT_march_, 1),
14962 S390_ATTRIB ("tune=", OPT_mtune_, 1),
14963 /* uinteger options */
14964 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
14965 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
14966 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
14967 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
14969 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
14970 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
14971 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
14972 S390_ATTRIB ("htm", OPT_mhtm, 0),
14973 S390_ATTRIB ("vx", OPT_mvx, 0),
14974 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
14975 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
14976 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
14977 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
14978 S390_PRAGMA ("zvector", OPT_mzvector, 0),
14979 /* boolean options */
14980 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
14985 /* If this is a list, recurse to get the options. */
14986 if (TREE_CODE (args) == TREE_LIST)
14989 int num_pragma_values;
14992 /* Note: attribs.c:decl_attributes prepends the values from
14993 current_target_pragma to the list of target attributes. To determine
14994 whether we're looking at a value of the attribute or the pragma we
14995 assume that the first [list_length (current_target_pragma)] values in
14996 the list are the values from the pragma. */
14997 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
14998 ? list_length (current_target_pragma) : 0;
14999 for (i = 0; args; args = TREE_CHAIN (args), i++)
15003 is_pragma = (force_pragma || i < num_pragma_values);
15004 if (TREE_VALUE (args)
15005 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15006 opts, new_opts_set,
15015 else if (TREE_CODE (args) != STRING_CST)
15017 error ("attribute %<target%> argument not a string");
15021 /* Handle multiple arguments separated by commas. */
15022 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15024 while (next_optstr && *next_optstr != '\0')
15026 char *p = next_optstr;
15028 char *comma = strchr (next_optstr, ',');
15029 size_t len, opt_len;
15035 enum cl_var_type var_type;
15041 len = comma - next_optstr;
15042 next_optstr = comma + 1;
15047 next_optstr = NULL;
15050 /* Recognize no-xxx. */
15051 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15060 /* Find the option. */
15063 for (i = 0; i < ARRAY_SIZE (attrs); i++)
15065 opt_len = attrs[i].len;
15066 if (ch == attrs[i].string[0]
15067 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15068 && memcmp (p, attrs[i].string, opt_len) == 0)
15070 opt = attrs[i].opt;
15071 if (!opt_set_p && cl_options[opt].cl_reject_negative)
15073 mask = cl_options[opt].var_value;
15074 var_type = cl_options[opt].var_type;
15080 /* Process the option. */
15083 error ("attribute(target(\"%s\")) is unknown", orig_p);
15086 else if (attrs[i].only_as_pragma && !force_pragma)
15088 /* Value is not allowed for the target attribute. */
15089 error ("value %qs is not supported by attribute %<target%>",
15094 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15096 if (var_type == CLVC_BIT_CLEAR)
15097 opt_set_p = !opt_set_p;
15100 opts->x_target_flags |= mask;
15102 opts->x_target_flags &= ~mask;
15103 new_opts_set->x_target_flags |= mask;
15106 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15110 if (cl_options[opt].cl_uinteger)
15112 /* Unsigned integer argument. Code based on the function
15113 decode_cmdline_option () in opts-common.c. */
15114 value = integral_argument (p + opt_len);
15117 value = (opt_set_p) ? 1 : 0;
15121 struct cl_decoded_option decoded;
15123 /* Value range check; only implemented for numeric and boolean
15124 options at the moment. */
15125 generate_option (opt, NULL, value, CL_TARGET, &decoded);
15126 s390_handle_option (opts, new_opts_set, &decoded, input_location);
15127 set_option (opts, new_opts_set, opt, value,
15128 p + opt_len, DK_UNSPECIFIED, input_location,
15133 error ("attribute(target(\"%s\")) is unknown", orig_p);
15138 else if (cl_options[opt].var_type == CLVC_ENUM)
15143 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15145 set_option (opts, new_opts_set, opt, value,
15146 p + opt_len, DK_UNSPECIFIED, input_location,
15150 error ("attribute(target(\"%s\")) is unknown", orig_p);
15156 gcc_unreachable ();
15161 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15164 s390_valid_target_attribute_tree (tree args,
15165 struct gcc_options *opts,
15166 const struct gcc_options *opts_set,
15169 tree t = NULL_TREE;
15170 struct gcc_options new_opts_set;
15172 memset (&new_opts_set, 0, sizeof (new_opts_set));
15174 /* Process each of the options on the chain. */
15175 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15177 return error_mark_node;
15179 /* If some option was set (even if it has not changed), rerun
15180 s390_option_override_internal, and then save the options away. */
15181 if (new_opts_set.x_target_flags
15182 || new_opts_set.x_s390_arch
15183 || new_opts_set.x_s390_tune
15184 || new_opts_set.x_s390_stack_guard
15185 || new_opts_set.x_s390_stack_size
15186 || new_opts_set.x_s390_branch_cost
15187 || new_opts_set.x_s390_warn_framesize
15188 || new_opts_set.x_s390_warn_dynamicstack_p)
15190 const unsigned char *src = (const unsigned char *)opts_set;
15191 unsigned char *dest = (unsigned char *)&new_opts_set;
15194 /* Merge the original option flags into the new ones. */
15195 for (i = 0; i < sizeof(*opts_set); i++)
15198 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15199 s390_option_override_internal (false, opts, &new_opts_set);
15200 /* Save the current options unless we are validating options for
15202 t = build_target_option_node (opts);
15207 /* Hook to validate attribute((target("string"))). */
15210 s390_valid_target_attribute_p (tree fndecl,
15211 tree ARG_UNUSED (name),
15213 int ARG_UNUSED (flags))
15215 struct gcc_options func_options;
15216 tree new_target, new_optimize;
15219 /* attribute((target("default"))) does nothing, beyond
15220 affecting multi-versioning. */
15221 if (TREE_VALUE (args)
15222 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15223 && TREE_CHAIN (args) == NULL_TREE
15224 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15227 tree old_optimize = build_optimization_node (&global_options);
15229 /* Get the optimization options of the current function. */
15230 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15232 if (!func_optimize)
15233 func_optimize = old_optimize;
15235 /* Init func_options. */
15236 memset (&func_options, 0, sizeof (func_options));
15237 init_options_struct (&func_options, NULL);
15238 lang_hooks.init_options_struct (&func_options);
15240 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15242 /* Initialize func_options to the default before its target options can
15244 cl_target_option_restore (&func_options,
15245 TREE_TARGET_OPTION (target_option_default_node));
15247 new_target = s390_valid_target_attribute_tree (args, &func_options,
15248 &global_options_set,
15250 current_target_pragma));
15251 new_optimize = build_optimization_node (&func_options);
15252 if (new_target == error_mark_node)
15254 else if (fndecl && new_target)
15256 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15257 if (old_optimize != new_optimize)
15258 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15263 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15267 s390_activate_target_options (tree new_tree)
15269 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
15270 if (TREE_TARGET_GLOBALS (new_tree))
15271 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15272 else if (new_tree == target_option_default_node)
15273 restore_target_globals (&default_target_globals);
15275 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15276 s390_previous_fndecl = NULL_TREE;
15279 /* Establish appropriate back-end context for processing the function
15280 FNDECL. The argument might be NULL to indicate processing at top
15281 level, outside of any function scope. */
15283 s390_set_current_function (tree fndecl)
15285 /* Only change the context if the function changes. This hook is called
15286 several times in the course of compiling a function, and we don't want to
15287 slow things down too much or call target_reinit when it isn't safe. */
15288 if (fndecl == s390_previous_fndecl)
15292 if (s390_previous_fndecl == NULL_TREE)
15293 old_tree = target_option_current_node;
15294 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
15295 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
15297 old_tree = target_option_default_node;
15299 if (fndecl == NULL_TREE)
15301 if (old_tree != target_option_current_node)
15302 s390_activate_target_options (target_option_current_node);
15306 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
15307 if (new_tree == NULL_TREE)
15308 new_tree = target_option_default_node;
15310 if (old_tree != new_tree)
15311 s390_activate_target_options (new_tree);
15312 s390_previous_fndecl = fndecl;
15316 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
15319 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
15320 unsigned int align ATTRIBUTE_UNUSED,
15321 enum by_pieces_operation op ATTRIBUTE_UNUSED,
15322 bool speed_p ATTRIBUTE_UNUSED)
15324 return (size == 1 || size == 2
15325 || size == 4 || (TARGET_ZARCH && size == 8));
15328 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
15331 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
15333 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
15334 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
15335 tree call_efpc = build_call_expr (efpc, 0);
15336 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
15338 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
15339 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
15340 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
15341 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
15342 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
15343 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
15345 /* Generates the equivalent of feholdexcept (&fenv_var)
15347 fenv_var = __builtin_s390_efpc ();
15348 __builtin_s390_sfpc (fenv_var & mask) */
15349 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
15351 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
15352 build_int_cst (unsigned_type_node,
15353 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
15354 FPC_EXCEPTION_MASK)));
15355 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
15356 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
15358 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
15360 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
15361 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
15362 build_int_cst (unsigned_type_node,
15363 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
15364 *clear = build_call_expr (sfpc, 1, new_fpc);
15366 /* Generates the equivalent of feupdateenv (fenv_var)
15368 old_fpc = __builtin_s390_efpc ();
15369 __builtin_s390_sfpc (fenv_var);
15370 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
15372 old_fpc = create_tmp_var_raw (unsigned_type_node);
15373 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
15374 old_fpc, call_efpc);
15376 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
15378 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
15379 build_int_cst (unsigned_type_node,
15381 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
15382 build_int_cst (unsigned_type_node,
15384 tree atomic_feraiseexcept
15385 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
15386 raise_old_except = build_call_expr (atomic_feraiseexcept,
15387 1, raise_old_except);
15389 *update = build2 (COMPOUND_EXPR, void_type_node,
15390 build2 (COMPOUND_EXPR, void_type_node,
15391 store_old_fpc, set_new_fpc),
15394 #undef FPC_EXCEPTION_MASK
15395 #undef FPC_FLAGS_MASK
15396 #undef FPC_DXC_MASK
15397 #undef FPC_EXCEPTION_MASK_SHIFT
15398 #undef FPC_FLAGS_SHIFT
15399 #undef FPC_DXC_SHIFT
15402 /* Return the vector mode to be used for inner mode MODE when doing
15404 static machine_mode
15405 s390_preferred_simd_mode (machine_mode mode)
15425 /* Our hardware does not require vectors to be strictly aligned. */
15427 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
15428 const_tree type ATTRIBUTE_UNUSED,
15429 int misalignment ATTRIBUTE_UNUSED,
15430 bool is_packed ATTRIBUTE_UNUSED)
15435 return default_builtin_support_vector_misalignment (mode, type, misalignment,
15439 /* The vector ABI requires vector types to be aligned on an 8 byte
15440 boundary (our stack alignment). However, we allow this to be
15441 overriden by the user, while this definitely breaks the ABI. */
15442 static HOST_WIDE_INT
15443 s390_vector_alignment (const_tree type)
15445 if (!TARGET_VX_ABI)
15446 return default_vector_alignment (type);
15448 if (TYPE_USER_ALIGN (type))
15449 return TYPE_ALIGN (type);
15451 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
15454 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15455 /* Implement TARGET_ASM_FILE_START. */
15457 s390_asm_file_start (void)
15459 default_file_start ();
15460 s390_asm_output_machine_for_arch (asm_out_file);
15464 /* Implement TARGET_ASM_FILE_END. */
15466 s390_asm_file_end (void)
15468 #ifdef HAVE_AS_GNU_ATTRIBUTE
15469 varpool_node *vnode;
15470 cgraph_node *cnode;
15472 FOR_EACH_VARIABLE (vnode)
15473 if (TREE_PUBLIC (vnode->decl))
15474 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
15476 FOR_EACH_FUNCTION (cnode)
15477 if (TREE_PUBLIC (cnode->decl))
15478 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
15481 if (s390_vector_abi != 0)
15482 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
15485 file_end_indicate_exec_stack ();
15487 if (flag_split_stack)
15488 file_end_indicate_split_stack ();
15491 /* Return true if TYPE is a vector bool type. */
15493 s390_vector_bool_type_p (const_tree type)
15495 return TYPE_VECTOR_OPAQUE (type);
15498 /* Return the diagnostic message string if the binary operation OP is
15499 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15501 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
15503 bool bool1_p, bool2_p;
15507 machine_mode mode1, mode2;
15509 if (!TARGET_ZVECTOR)
15512 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
15515 bool1_p = s390_vector_bool_type_p (type1);
15516 bool2_p = s390_vector_bool_type_p (type2);
15518 /* Mixing signed and unsigned types is forbidden for all
15520 if (!bool1_p && !bool2_p
15521 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
15522 return N_("types differ in signedness");
15524 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
15525 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
15526 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
15527 || op == ROUND_DIV_EXPR);
15528 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
15529 || op == EQ_EXPR || op == NE_EXPR);
15531 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
15532 return N_("binary operator does not support two vector bool operands");
15534 if (bool1_p != bool2_p && (muldiv_p || compare_p))
15535 return N_("binary operator does not support vector bool operand");
15537 mode1 = TYPE_MODE (type1);
15538 mode2 = TYPE_MODE (type2);
15540 if (bool1_p != bool2_p && plusminus_p
15541 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
15542 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
15543 return N_("binary operator does not support mixing vector "
15544 "bool with floating point vector operands");
15549 /* Implement TARGET_C_EXCESS_PRECISION.
15551 FIXME: For historical reasons, float_t and double_t are typedef'ed to
15552 double on s390, causing operations on float_t to operate in a higher
15553 precision than is necessary. However, it is not the case that SFmode
15554 operations have implicit excess precision, and we generate more optimal
15555 code if we let the compiler know no implicit extra precision is added.
15557 That means when we are compiling with -fexcess-precision=fast, the value
15558 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
15559 float_t (though they would be correct for -fexcess-precision=standard).
15561 A complete fix would modify glibc to remove the unnecessary typedef
15562 of float_t to double. */
15564 static enum flt_eval_method
15565 s390_excess_precision (enum excess_precision_type type)
15569 case EXCESS_PRECISION_TYPE_IMPLICIT:
15570 case EXCESS_PRECISION_TYPE_FAST:
15571 /* The fastest type to promote to will always be the native type,
15572 whether that occurs with implicit excess precision or
15574 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
15575 case EXCESS_PRECISION_TYPE_STANDARD:
15576 /* Otherwise, when we are in a standards compliant mode, to
15577 ensure consistency with the implementation in glibc, report that
15578 float is evaluated to the range and precision of double. */
15579 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
15581 gcc_unreachable ();
15583 return FLT_EVAL_METHOD_UNPREDICTABLE;
15586 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
15588 static unsigned HOST_WIDE_INT
15589 s390_asan_shadow_offset (void)
15591 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
15594 /* Initialize GCC target structure. */
15596 #undef TARGET_ASM_ALIGNED_HI_OP
15597 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
15598 #undef TARGET_ASM_ALIGNED_DI_OP
15599 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
15600 #undef TARGET_ASM_INTEGER
15601 #define TARGET_ASM_INTEGER s390_assemble_integer
15603 #undef TARGET_ASM_OPEN_PAREN
15604 #define TARGET_ASM_OPEN_PAREN ""
15606 #undef TARGET_ASM_CLOSE_PAREN
15607 #define TARGET_ASM_CLOSE_PAREN ""
15609 #undef TARGET_OPTION_OVERRIDE
15610 #define TARGET_OPTION_OVERRIDE s390_option_override
15612 #ifdef TARGET_THREAD_SSP_OFFSET
15613 #undef TARGET_STACK_PROTECT_GUARD
15614 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
15617 #undef TARGET_ENCODE_SECTION_INFO
15618 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
15620 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15621 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15624 #undef TARGET_HAVE_TLS
15625 #define TARGET_HAVE_TLS true
15627 #undef TARGET_CANNOT_FORCE_CONST_MEM
15628 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
15630 #undef TARGET_DELEGITIMIZE_ADDRESS
15631 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
15633 #undef TARGET_LEGITIMIZE_ADDRESS
15634 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
15636 #undef TARGET_RETURN_IN_MEMORY
15637 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
15639 #undef TARGET_INIT_BUILTINS
15640 #define TARGET_INIT_BUILTINS s390_init_builtins
15641 #undef TARGET_EXPAND_BUILTIN
15642 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
15643 #undef TARGET_BUILTIN_DECL
15644 #define TARGET_BUILTIN_DECL s390_builtin_decl
15646 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
15647 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
15649 #undef TARGET_ASM_OUTPUT_MI_THUNK
15650 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
15651 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
15652 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
15654 #undef TARGET_C_EXCESS_PRECISION
15655 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
15657 #undef TARGET_SCHED_ADJUST_PRIORITY
15658 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
15659 #undef TARGET_SCHED_ISSUE_RATE
15660 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
15661 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15662 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
15664 #undef TARGET_SCHED_VARIABLE_ISSUE
15665 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
15666 #undef TARGET_SCHED_REORDER
15667 #define TARGET_SCHED_REORDER s390_sched_reorder
15668 #undef TARGET_SCHED_INIT
15669 #define TARGET_SCHED_INIT s390_sched_init
15671 #undef TARGET_CANNOT_COPY_INSN_P
15672 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
15673 #undef TARGET_RTX_COSTS
15674 #define TARGET_RTX_COSTS s390_rtx_costs
15675 #undef TARGET_ADDRESS_COST
15676 #define TARGET_ADDRESS_COST s390_address_cost
15677 #undef TARGET_REGISTER_MOVE_COST
15678 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
15679 #undef TARGET_MEMORY_MOVE_COST
15680 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
15681 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
15682 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
15683 s390_builtin_vectorization_cost
15685 #undef TARGET_MACHINE_DEPENDENT_REORG
15686 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
15688 #undef TARGET_VALID_POINTER_MODE
15689 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
15691 #undef TARGET_BUILD_BUILTIN_VA_LIST
15692 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
15693 #undef TARGET_EXPAND_BUILTIN_VA_START
15694 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
15695 #undef TARGET_ASAN_SHADOW_OFFSET
15696 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
15697 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
15698 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
15700 #undef TARGET_PROMOTE_FUNCTION_MODE
15701 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
15702 #undef TARGET_PASS_BY_REFERENCE
15703 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
15705 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
15706 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
15707 #undef TARGET_FUNCTION_ARG
15708 #define TARGET_FUNCTION_ARG s390_function_arg
15709 #undef TARGET_FUNCTION_ARG_ADVANCE
15710 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
15711 #undef TARGET_FUNCTION_VALUE
15712 #define TARGET_FUNCTION_VALUE s390_function_value
15713 #undef TARGET_LIBCALL_VALUE
15714 #define TARGET_LIBCALL_VALUE s390_libcall_value
15715 #undef TARGET_STRICT_ARGUMENT_NAMING
15716 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
15718 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
15719 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
15721 #undef TARGET_FIXED_CONDITION_CODE_REGS
15722 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
15724 #undef TARGET_CC_MODES_COMPATIBLE
15725 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
15727 #undef TARGET_INVALID_WITHIN_DOLOOP
15728 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
15731 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
15732 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
15735 #undef TARGET_DWARF_FRAME_REG_MODE
15736 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
15738 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
15739 #undef TARGET_MANGLE_TYPE
15740 #define TARGET_MANGLE_TYPE s390_mangle_type
15743 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15744 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15746 #undef TARGET_VECTOR_MODE_SUPPORTED_P
15747 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
15749 #undef TARGET_PREFERRED_RELOAD_CLASS
15750 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
15752 #undef TARGET_SECONDARY_RELOAD
15753 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
15755 #undef TARGET_LIBGCC_CMP_RETURN_MODE
15756 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
15758 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
15759 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
15761 #undef TARGET_LEGITIMATE_ADDRESS_P
15762 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
15764 #undef TARGET_LEGITIMATE_CONSTANT_P
15765 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
15767 #undef TARGET_LRA_P
15768 #define TARGET_LRA_P s390_lra_p
15770 #undef TARGET_CAN_ELIMINATE
15771 #define TARGET_CAN_ELIMINATE s390_can_eliminate
15773 #undef TARGET_CONDITIONAL_REGISTER_USAGE
15774 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
15776 #undef TARGET_LOOP_UNROLL_ADJUST
15777 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
15779 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
15780 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
15781 #undef TARGET_TRAMPOLINE_INIT
15782 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
15785 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
15786 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
15788 #undef TARGET_UNWIND_WORD_MODE
15789 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
15791 #undef TARGET_CANONICALIZE_COMPARISON
15792 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
15794 #undef TARGET_HARD_REGNO_SCRATCH_OK
15795 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
15797 #undef TARGET_ATTRIBUTE_TABLE
15798 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
15800 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
15801 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
15803 #undef TARGET_SET_UP_BY_PROLOGUE
15804 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
15806 #undef TARGET_EXTRA_LIVE_ON_ENTRY
15807 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
15809 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
15810 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
15811 s390_use_by_pieces_infrastructure_p
15813 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
15814 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
15816 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
15817 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
15819 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
15820 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
15822 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
15823 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
15825 #undef TARGET_VECTOR_ALIGNMENT
15826 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
15828 #undef TARGET_INVALID_BINARY_OP
15829 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
15831 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15832 #undef TARGET_ASM_FILE_START
15833 #define TARGET_ASM_FILE_START s390_asm_file_start
15836 #undef TARGET_ASM_FILE_END
15837 #define TARGET_ASM_FILE_END s390_asm_file_end
15839 #if S390_USE_TARGET_ATTRIBUTE
15840 #undef TARGET_SET_CURRENT_FUNCTION
15841 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
15843 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
15844 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
15847 #undef TARGET_OPTION_RESTORE
15848 #define TARGET_OPTION_RESTORE s390_function_specific_restore
15850 struct gcc_target targetm = TARGET_INITIALIZER;
15852 #include "gt-s390.h"