1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2016 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
28 #include "target-globals.h"
36 #include "stringpool.h"
43 #include "diagnostic-core.h"
44 #include "diagnostic.h"
46 #include "fold-const.h"
47 #include "print-tree.h"
48 #include "stor-layout.h"
51 #include "conditions.h"
53 #include "insn-attr.h"
65 #include "cfgcleanup.h"
67 #include "langhooks.h"
68 #include "internal-fn.h"
69 #include "gimple-fold.h"
74 #include "tree-pass.h"
79 #include "tm-constrs.h"
81 /* This file should be included last. */
82 #include "target-def.h"
84 /* Remember the last target of s390_set_current_function. */
85 static GTY(()) tree s390_previous_fndecl;
87 /* Define the specific costs for a given cpu. */
89 struct processor_costs
92 const int m; /* cost of an M instruction. */
93 const int mghi; /* cost of an MGHI instruction. */
94 const int mh; /* cost of an MH instruction. */
95 const int mhi; /* cost of an MHI instruction. */
96 const int ml; /* cost of an ML instruction. */
97 const int mr; /* cost of an MR instruction. */
98 const int ms; /* cost of an MS instruction. */
99 const int msg; /* cost of an MSG instruction. */
100 const int msgf; /* cost of an MSGF instruction. */
101 const int msgfr; /* cost of an MSGFR instruction. */
102 const int msgr; /* cost of an MSGR instruction. */
103 const int msr; /* cost of an MSR instruction. */
104 const int mult_df; /* cost of multiplication in DFmode. */
107 const int sqxbr; /* cost of square root in TFmode. */
108 const int sqdbr; /* cost of square root in DFmode. */
109 const int sqebr; /* cost of square root in SFmode. */
110 /* multiply and add */
111 const int madbr; /* cost of multiply and add in DFmode. */
112 const int maebr; /* cost of multiply and add in SFmode. */
124 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
127 struct processor_costs z900_cost =
129 COSTS_N_INSNS (5), /* M */
130 COSTS_N_INSNS (10), /* MGHI */
131 COSTS_N_INSNS (5), /* MH */
132 COSTS_N_INSNS (4), /* MHI */
133 COSTS_N_INSNS (5), /* ML */
134 COSTS_N_INSNS (5), /* MR */
135 COSTS_N_INSNS (4), /* MS */
136 COSTS_N_INSNS (15), /* MSG */
137 COSTS_N_INSNS (7), /* MSGF */
138 COSTS_N_INSNS (7), /* MSGFR */
139 COSTS_N_INSNS (10), /* MSGR */
140 COSTS_N_INSNS (4), /* MSR */
141 COSTS_N_INSNS (7), /* multiplication in DFmode */
142 COSTS_N_INSNS (13), /* MXBR */
143 COSTS_N_INSNS (136), /* SQXBR */
144 COSTS_N_INSNS (44), /* SQDBR */
145 COSTS_N_INSNS (35), /* SQEBR */
146 COSTS_N_INSNS (18), /* MADBR */
147 COSTS_N_INSNS (13), /* MAEBR */
148 COSTS_N_INSNS (134), /* DXBR */
149 COSTS_N_INSNS (30), /* DDBR */
150 COSTS_N_INSNS (27), /* DEBR */
151 COSTS_N_INSNS (220), /* DLGR */
152 COSTS_N_INSNS (34), /* DLR */
153 COSTS_N_INSNS (34), /* DR */
154 COSTS_N_INSNS (32), /* DSGFR */
155 COSTS_N_INSNS (32), /* DSGR */
159 struct processor_costs z990_cost =
161 COSTS_N_INSNS (4), /* M */
162 COSTS_N_INSNS (2), /* MGHI */
163 COSTS_N_INSNS (2), /* MH */
164 COSTS_N_INSNS (2), /* MHI */
165 COSTS_N_INSNS (4), /* ML */
166 COSTS_N_INSNS (4), /* MR */
167 COSTS_N_INSNS (5), /* MS */
168 COSTS_N_INSNS (6), /* MSG */
169 COSTS_N_INSNS (4), /* MSGF */
170 COSTS_N_INSNS (4), /* MSGFR */
171 COSTS_N_INSNS (4), /* MSGR */
172 COSTS_N_INSNS (4), /* MSR */
173 COSTS_N_INSNS (1), /* multiplication in DFmode */
174 COSTS_N_INSNS (28), /* MXBR */
175 COSTS_N_INSNS (130), /* SQXBR */
176 COSTS_N_INSNS (66), /* SQDBR */
177 COSTS_N_INSNS (38), /* SQEBR */
178 COSTS_N_INSNS (1), /* MADBR */
179 COSTS_N_INSNS (1), /* MAEBR */
180 COSTS_N_INSNS (60), /* DXBR */
181 COSTS_N_INSNS (40), /* DDBR */
182 COSTS_N_INSNS (26), /* DEBR */
183 COSTS_N_INSNS (176), /* DLGR */
184 COSTS_N_INSNS (31), /* DLR */
185 COSTS_N_INSNS (31), /* DR */
186 COSTS_N_INSNS (31), /* DSGFR */
187 COSTS_N_INSNS (31), /* DSGR */
191 struct processor_costs z9_109_cost =
193 COSTS_N_INSNS (4), /* M */
194 COSTS_N_INSNS (2), /* MGHI */
195 COSTS_N_INSNS (2), /* MH */
196 COSTS_N_INSNS (2), /* MHI */
197 COSTS_N_INSNS (4), /* ML */
198 COSTS_N_INSNS (4), /* MR */
199 COSTS_N_INSNS (5), /* MS */
200 COSTS_N_INSNS (6), /* MSG */
201 COSTS_N_INSNS (4), /* MSGF */
202 COSTS_N_INSNS (4), /* MSGFR */
203 COSTS_N_INSNS (4), /* MSGR */
204 COSTS_N_INSNS (4), /* MSR */
205 COSTS_N_INSNS (1), /* multiplication in DFmode */
206 COSTS_N_INSNS (28), /* MXBR */
207 COSTS_N_INSNS (130), /* SQXBR */
208 COSTS_N_INSNS (66), /* SQDBR */
209 COSTS_N_INSNS (38), /* SQEBR */
210 COSTS_N_INSNS (1), /* MADBR */
211 COSTS_N_INSNS (1), /* MAEBR */
212 COSTS_N_INSNS (60), /* DXBR */
213 COSTS_N_INSNS (40), /* DDBR */
214 COSTS_N_INSNS (26), /* DEBR */
215 COSTS_N_INSNS (30), /* DLGR */
216 COSTS_N_INSNS (23), /* DLR */
217 COSTS_N_INSNS (23), /* DR */
218 COSTS_N_INSNS (24), /* DSGFR */
219 COSTS_N_INSNS (24), /* DSGR */
223 struct processor_costs z10_cost =
225 COSTS_N_INSNS (10), /* M */
226 COSTS_N_INSNS (10), /* MGHI */
227 COSTS_N_INSNS (10), /* MH */
228 COSTS_N_INSNS (10), /* MHI */
229 COSTS_N_INSNS (10), /* ML */
230 COSTS_N_INSNS (10), /* MR */
231 COSTS_N_INSNS (10), /* MS */
232 COSTS_N_INSNS (10), /* MSG */
233 COSTS_N_INSNS (10), /* MSGF */
234 COSTS_N_INSNS (10), /* MSGFR */
235 COSTS_N_INSNS (10), /* MSGR */
236 COSTS_N_INSNS (10), /* MSR */
237 COSTS_N_INSNS (1) , /* multiplication in DFmode */
238 COSTS_N_INSNS (50), /* MXBR */
239 COSTS_N_INSNS (120), /* SQXBR */
240 COSTS_N_INSNS (52), /* SQDBR */
241 COSTS_N_INSNS (38), /* SQEBR */
242 COSTS_N_INSNS (1), /* MADBR */
243 COSTS_N_INSNS (1), /* MAEBR */
244 COSTS_N_INSNS (111), /* DXBR */
245 COSTS_N_INSNS (39), /* DDBR */
246 COSTS_N_INSNS (32), /* DEBR */
247 COSTS_N_INSNS (160), /* DLGR */
248 COSTS_N_INSNS (71), /* DLR */
249 COSTS_N_INSNS (71), /* DR */
250 COSTS_N_INSNS (71), /* DSGFR */
251 COSTS_N_INSNS (71), /* DSGR */
255 struct processor_costs z196_cost =
257 COSTS_N_INSNS (7), /* M */
258 COSTS_N_INSNS (5), /* MGHI */
259 COSTS_N_INSNS (5), /* MH */
260 COSTS_N_INSNS (5), /* MHI */
261 COSTS_N_INSNS (7), /* ML */
262 COSTS_N_INSNS (7), /* MR */
263 COSTS_N_INSNS (6), /* MS */
264 COSTS_N_INSNS (8), /* MSG */
265 COSTS_N_INSNS (6), /* MSGF */
266 COSTS_N_INSNS (6), /* MSGFR */
267 COSTS_N_INSNS (8), /* MSGR */
268 COSTS_N_INSNS (6), /* MSR */
269 COSTS_N_INSNS (1) , /* multiplication in DFmode */
270 COSTS_N_INSNS (40), /* MXBR B+40 */
271 COSTS_N_INSNS (100), /* SQXBR B+100 */
272 COSTS_N_INSNS (42), /* SQDBR B+42 */
273 COSTS_N_INSNS (28), /* SQEBR B+28 */
274 COSTS_N_INSNS (1), /* MADBR B */
275 COSTS_N_INSNS (1), /* MAEBR B */
276 COSTS_N_INSNS (101), /* DXBR B+101 */
277 COSTS_N_INSNS (29), /* DDBR */
278 COSTS_N_INSNS (22), /* DEBR */
279 COSTS_N_INSNS (160), /* DLGR cracked */
280 COSTS_N_INSNS (160), /* DLR cracked */
281 COSTS_N_INSNS (160), /* DR expanded */
282 COSTS_N_INSNS (160), /* DSGFR cracked */
283 COSTS_N_INSNS (160), /* DSGR cracked */
287 struct processor_costs zEC12_cost =
289 COSTS_N_INSNS (7), /* M */
290 COSTS_N_INSNS (5), /* MGHI */
291 COSTS_N_INSNS (5), /* MH */
292 COSTS_N_INSNS (5), /* MHI */
293 COSTS_N_INSNS (7), /* ML */
294 COSTS_N_INSNS (7), /* MR */
295 COSTS_N_INSNS (6), /* MS */
296 COSTS_N_INSNS (8), /* MSG */
297 COSTS_N_INSNS (6), /* MSGF */
298 COSTS_N_INSNS (6), /* MSGFR */
299 COSTS_N_INSNS (8), /* MSGR */
300 COSTS_N_INSNS (6), /* MSR */
301 COSTS_N_INSNS (1) , /* multiplication in DFmode */
302 COSTS_N_INSNS (40), /* MXBR B+40 */
303 COSTS_N_INSNS (100), /* SQXBR B+100 */
304 COSTS_N_INSNS (42), /* SQDBR B+42 */
305 COSTS_N_INSNS (28), /* SQEBR B+28 */
306 COSTS_N_INSNS (1), /* MADBR B */
307 COSTS_N_INSNS (1), /* MAEBR B */
308 COSTS_N_INSNS (131), /* DXBR B+131 */
309 COSTS_N_INSNS (29), /* DDBR */
310 COSTS_N_INSNS (22), /* DEBR */
311 COSTS_N_INSNS (160), /* DLGR cracked */
312 COSTS_N_INSNS (160), /* DLR cracked */
313 COSTS_N_INSNS (160), /* DR expanded */
314 COSTS_N_INSNS (160), /* DSGFR cracked */
315 COSTS_N_INSNS (160), /* DSGR cracked */
320 const char *const name;
321 const enum processor_type processor;
322 const struct processor_costs *cost;
324 const processor_table[] =
326 { "g5", PROCESSOR_9672_G5, &z900_cost },
327 { "g6", PROCESSOR_9672_G6, &z900_cost },
328 { "z900", PROCESSOR_2064_Z900, &z900_cost },
329 { "z990", PROCESSOR_2084_Z990, &z990_cost },
330 { "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
331 { "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost },
332 { "z10", PROCESSOR_2097_Z10, &z10_cost },
333 { "z196", PROCESSOR_2817_Z196, &z196_cost },
334 { "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost },
335 { "z13", PROCESSOR_2964_Z13, &zEC12_cost },
336 { "native", PROCESSOR_NATIVE, NULL }
339 extern int reload_completed;
341 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
342 static rtx_insn *last_scheduled_insn;
343 #define MAX_SCHED_UNITS 3
344 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
346 /* The maximum score added for an instruction whose unit hasn't been
347 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
348 give instruction mix scheduling more priority over instruction
350 #define MAX_SCHED_MIX_SCORE 8
352 /* The maximum distance up to which individual scores will be
353 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
354 Increase this with the OOO windows size of the machine. */
355 #define MAX_SCHED_MIX_DISTANCE 100
357 /* Structure used to hold the components of a S/390 memory
358 address. A legitimate address on S/390 is of the general
360 base + index + displacement
361 where any of the components is optional.
363 base and index are registers of the class ADDR_REGS,
364 displacement is an unsigned 12-bit immediate constant. */
375 /* The following structure is embedded in the machine
376 specific part of struct function. */
378 struct GTY (()) s390_frame_layout
380 /* Offset within stack frame. */
381 HOST_WIDE_INT gprs_offset;
382 HOST_WIDE_INT f0_offset;
383 HOST_WIDE_INT f4_offset;
384 HOST_WIDE_INT f8_offset;
385 HOST_WIDE_INT backchain_offset;
387 /* Number of first and last gpr where slots in the register
388 save area are reserved for. */
389 int first_save_gpr_slot;
390 int last_save_gpr_slot;
392 /* Location (FP register number) where GPRs (r0-r15) should
394 0 - does not need to be saved at all
396 #define SAVE_SLOT_NONE 0
397 #define SAVE_SLOT_STACK -1
398 signed char gpr_save_slots[16];
400 /* Number of first and last gpr to be saved, restored. */
402 int first_restore_gpr;
404 int last_restore_gpr;
406 /* Bits standing for floating point registers. Set, if the
407 respective register has to be saved. Starting with reg 16 (f0)
408 at the rightmost bit.
409 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
410 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
411 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
412 unsigned int fpr_bitmap;
414 /* Number of floating point registers f8-f15 which must be saved. */
417 /* Set if return address needs to be saved.
418 This flag is set by s390_return_addr_rtx if it could not use
419 the initial value of r14 and therefore depends on r14 saved
421 bool save_return_addr_p;
423 /* Size of stack frame. */
424 HOST_WIDE_INT frame_size;
427 /* Define the structure for the machine field in struct function. */
429 struct GTY(()) machine_function
431 struct s390_frame_layout frame_layout;
433 /* Literal pool base register. */
436 /* True if we may need to perform branch splitting. */
437 bool split_branches_pending_p;
439 bool has_landing_pad_p;
441 /* True if the current function may contain a tbegin clobbering
445 /* For -fsplit-stack support: A stack local which holds a pointer to
446 the stack arguments for a function with a variable number of
447 arguments. This is set at the start of the function and is used
448 to initialize the overflow_arg_area field of the va_list
450 rtx split_stack_varargs_pointer;
453 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
455 #define cfun_frame_layout (cfun->machine->frame_layout)
456 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
457 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
458 ? cfun_frame_layout.fpr_bitmap & 0x0f \
459 : cfun_frame_layout.fpr_bitmap & 0x03))
460 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
461 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
462 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
463 (1 << (REGNO - FPR0_REGNUM)))
464 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
465 (1 << (REGNO - FPR0_REGNUM))))
466 #define cfun_gpr_save_slot(REGNO) \
467 cfun->machine->frame_layout.gpr_save_slots[REGNO]
469 /* Number of GPRs and FPRs used for argument passing. */
470 #define GP_ARG_NUM_REG 5
471 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
472 #define VEC_ARG_NUM_REG 8
474 /* A couple of shortcuts. */
475 #define CONST_OK_FOR_J(x) \
476 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
477 #define CONST_OK_FOR_K(x) \
478 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
479 #define CONST_OK_FOR_Os(x) \
480 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
481 #define CONST_OK_FOR_Op(x) \
482 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
483 #define CONST_OK_FOR_On(x) \
484 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
486 #define REGNO_PAIR_OK(REGNO, MODE) \
487 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
489 /* That's the read ahead of the dynamic branch prediction unit in
490 bytes on a z10 (or higher) CPU. */
491 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
494 /* Indicate which ABI has been used for passing vector args.
495 0 - no vector type arguments have been passed where the ABI is relevant
496 1 - the old ABI has been used
497 2 - a vector type argument has been passed either in a vector register
498 or on the stack by value */
499 static int s390_vector_abi = 0;
501 /* Set the vector ABI marker if TYPE is subject to the vector ABI
502 switch. The vector ABI affects only vector data types. There are
503 two aspects of the vector ABI relevant here:
505 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
506 ABI and natural alignment with the old.
508 2. vector <= 16 bytes are passed in VRs or by value on the stack
509 with the new ABI but by reference on the stack with the old.
511 If ARG_P is true TYPE is used for a function argument or return
512 value. The ABI marker then is set for all vector data types. If
513 ARG_P is false only type 1 vectors are being checked. */
516 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
518 static hash_set<const_tree> visited_types_hash;
523 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
526 if (visited_types_hash.contains (type))
529 visited_types_hash.add (type);
531 if (VECTOR_TYPE_P (type))
533 int type_size = int_size_in_bytes (type);
535 /* Outside arguments only the alignment is changing and this
536 only happens for vector types >= 16 bytes. */
537 if (!arg_p && type_size < 16)
540 /* In arguments vector types > 16 are passed as before (GCC
541 never enforced the bigger alignment for arguments which was
542 required by the old vector ABI). However, it might still be
543 ABI relevant due to the changed alignment if it is a struct
545 if (arg_p && type_size > 16 && !in_struct_p)
548 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
550 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
552 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
553 natural alignment there will never be ABI dependent padding
554 in an array type. That's why we do not set in_struct_p to
556 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
558 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
562 /* Check the return type. */
563 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
565 for (arg_chain = TYPE_ARG_TYPES (type);
567 arg_chain = TREE_CHAIN (arg_chain))
568 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
570 else if (RECORD_OR_UNION_TYPE_P (type))
574 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
576 if (TREE_CODE (field) != FIELD_DECL)
579 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
585 /* System z builtins. */
587 #include "s390-builtins.h"
589 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
594 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
596 #define OB_DEF_VAR(...)
597 #include "s390-builtins.def"
601 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
606 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
608 #define OB_DEF_VAR(...)
609 #include "s390-builtins.def"
613 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
619 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
620 #define OB_DEF_VAR(...)
621 #include "s390-builtins.def"
626 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
633 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS,
634 #include "s390-builtins.def"
638 tree s390_builtin_types[BT_MAX];
639 tree s390_builtin_fn_types[BT_FN_MAX];
640 tree s390_builtin_decls[S390_BUILTIN_MAX +
641 S390_OVERLOADED_BUILTIN_MAX +
642 S390_OVERLOADED_BUILTIN_VAR_MAX];
644 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
648 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
650 #define OB_DEF_VAR(...)
652 #include "s390-builtins.def"
657 s390_init_builtins (void)
659 /* These definitions are being used in s390-builtins.def. */
660 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
662 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
663 tree c_uint64_type_node;
665 /* The uint64_type_node from tree.c is not compatible to the C99
666 uint64_t data type. What we want is c_uint64_type_node from
667 c-common.c. But since backend code is not supposed to interface
668 with the frontend we recreate it here. */
670 c_uint64_type_node = long_unsigned_type_node;
672 c_uint64_type_node = long_long_unsigned_type_node;
675 #define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P) \
676 if (s390_builtin_types[INDEX] == NULL) \
677 s390_builtin_types[INDEX] = (!CONST_P) ? \
678 (NODE) : build_type_variant ((NODE), 1, 0);
680 #undef DEF_POINTER_TYPE
681 #define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE) \
682 if (s390_builtin_types[INDEX] == NULL) \
683 s390_builtin_types[INDEX] = \
684 build_pointer_type (s390_builtin_types[INDEX_BASE]);
686 #undef DEF_DISTINCT_TYPE
687 #define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE) \
688 if (s390_builtin_types[INDEX] == NULL) \
689 s390_builtin_types[INDEX] = \
690 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
692 #undef DEF_VECTOR_TYPE
693 #define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
694 if (s390_builtin_types[INDEX] == NULL) \
695 s390_builtin_types[INDEX] = \
696 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
698 #undef DEF_OPAQUE_VECTOR_TYPE
699 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
700 if (s390_builtin_types[INDEX] == NULL) \
701 s390_builtin_types[INDEX] = \
702 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
705 #define DEF_FN_TYPE(INDEX, BFLAGS, args...) \
706 if (s390_builtin_fn_types[INDEX] == NULL) \
707 s390_builtin_fn_types[INDEX] = \
708 build_function_type_list (args, NULL_TREE);
710 #define DEF_OV_TYPE(...)
711 #include "s390-builtin-types.def"
714 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
715 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
716 s390_builtin_decls[S390_BUILTIN_##NAME] = \
717 add_builtin_function ("__builtin_" #NAME, \
718 s390_builtin_fn_types[FNTYPE], \
719 S390_BUILTIN_##NAME, \
724 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
725 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
727 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
728 add_builtin_function ("__builtin_" #NAME, \
729 s390_builtin_fn_types[FNTYPE], \
730 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
735 #define OB_DEF_VAR(...)
736 #include "s390-builtins.def"
740 /* Return true if ARG is appropriate as argument number ARGNUM of
741 builtin DECL. The operand flags from s390-builtins.def have to
742 passed as OP_FLAGS. */
744 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
746 if (O_UIMM_P (op_flags))
748 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
749 int bitwidth = bitwidths[op_flags - O_U1];
751 if (!tree_fits_uhwi_p (arg)
752 || tree_to_uhwi (arg) > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1)
754 error("constant argument %d for builtin %qF is out of range (0.."
755 HOST_WIDE_INT_PRINT_UNSIGNED ")",
757 ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1);
762 if (O_SIMM_P (op_flags))
764 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
765 int bitwidth = bitwidths[op_flags - O_S2];
767 if (!tree_fits_shwi_p (arg)
768 || tree_to_shwi (arg) < -((HOST_WIDE_INT)1 << (bitwidth - 1))
769 || tree_to_shwi (arg) > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1))
771 error("constant argument %d for builtin %qF is out of range ("
772 HOST_WIDE_INT_PRINT_DEC ".."
773 HOST_WIDE_INT_PRINT_DEC ")",
775 -((HOST_WIDE_INT)1 << (bitwidth - 1)),
776 ((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1);
783 /* Expand an expression EXP that calls a built-in function,
784 with result going to TARGET if that's convenient
785 (and in mode MODE if that's convenient).
786 SUBTARGET may be used as the target for computing one of EXP's operands.
787 IGNORE is nonzero if the value is to be ignored. */
790 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
791 machine_mode mode ATTRIBUTE_UNUSED,
792 int ignore ATTRIBUTE_UNUSED)
796 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
797 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
798 enum insn_code icode;
799 rtx op[MAX_ARGS], pat;
803 call_expr_arg_iterator iter;
804 unsigned int all_op_flags = opflags_for_builtin (fcode);
805 machine_mode last_vec_mode = VOIDmode;
807 if (TARGET_DEBUG_ARG)
810 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
811 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
812 bflags_for_builtin (fcode));
815 if (S390_USE_TARGET_ATTRIBUTE)
819 bflags = bflags_for_builtin (fcode);
820 if ((bflags & B_HTM) && !TARGET_HTM)
822 error ("Builtin %qF is not supported without -mhtm "
823 "(default with -march=zEC12 and higher).", fndecl);
826 if ((bflags & B_VX) && !TARGET_VX)
828 error ("Builtin %qF is not supported without -mvx "
829 "(default with -march=z13 and higher).", fndecl);
833 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
834 && fcode < S390_ALL_BUILTIN_MAX)
838 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
840 icode = code_for_builtin[fcode];
841 /* Set a flag in the machine specific cfun part in order to support
842 saving/restoring of FPRs. */
843 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
844 cfun->machine->tbegin_p = true;
846 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
848 error ("Unresolved overloaded builtin");
852 internal_error ("bad builtin fcode");
855 internal_error ("bad builtin icode");
857 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
861 machine_mode tmode = insn_data[icode].operand[0].mode;
863 || GET_MODE (target) != tmode
864 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
865 target = gen_reg_rtx (tmode);
867 /* There are builtins (e.g. vec_promote) with no vector
868 arguments but an element selector. So we have to also look
869 at the vector return type when emitting the modulo
871 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
872 last_vec_mode = insn_data[icode].operand[0].mode;
876 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
879 const struct insn_operand_data *insn_op;
880 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
882 all_op_flags = all_op_flags >> O_SHIFT;
884 if (arg == error_mark_node)
886 if (arity >= MAX_ARGS)
889 if (O_IMM_P (op_flags)
890 && TREE_CODE (arg) != INTEGER_CST)
892 error ("constant value required for builtin %qF argument %d",
897 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
900 insn_op = &insn_data[icode].operand[arity + nonvoid];
901 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
903 /* expand_expr truncates constants to the target mode only if it
904 is "convenient". However, our checks below rely on this
906 if (CONST_INT_P (op[arity])
907 && SCALAR_INT_MODE_P (insn_op->mode)
908 && GET_MODE (op[arity]) != insn_op->mode)
909 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
912 /* Wrap the expanded RTX for pointer types into a MEM expr with
913 the proper mode. This allows us to use e.g. (match_operand
914 "memory_operand"..) in the insn patterns instead of (mem
915 (match_operand "address_operand)). This is helpful for
916 patterns not just accepting MEMs. */
917 if (POINTER_TYPE_P (TREE_TYPE (arg))
918 && insn_op->predicate != address_operand)
919 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
921 /* Expand the module operation required on element selectors. */
922 if (op_flags == O_ELEM)
924 gcc_assert (last_vec_mode != VOIDmode);
925 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
927 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
928 NULL_RTX, 1, OPTAB_DIRECT);
931 /* Record the vector mode used for an element selector. This assumes:
932 1. There is no builtin with two different vector modes and an element selector
933 2. The element selector comes after the vector type it is referring to.
934 This currently the true for all the builtins but FIXME we
935 should better check for that. */
936 if (VECTOR_MODE_P (insn_op->mode))
937 last_vec_mode = insn_op->mode;
939 if (insn_op->predicate (op[arity], insn_op->mode))
945 if (MEM_P (op[arity])
946 && insn_op->predicate == memory_operand
947 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
948 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
950 op[arity] = replace_equiv_address (op[arity],
951 copy_to_mode_reg (Pmode,
952 XEXP (op[arity], 0)));
954 /* Some of the builtins require different modes/types than the
955 pattern in order to implement a specific API. Instead of
956 adding many expanders which do the mode change we do it here.
957 E.g. s390_vec_add_u128 required to have vector unsigned char
958 arguments is mapped to addti3. */
959 else if (insn_op->mode != VOIDmode
960 && GET_MODE (op[arity]) != VOIDmode
961 && GET_MODE (op[arity]) != insn_op->mode
962 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
963 GET_MODE (op[arity]), 0))
968 else if (GET_MODE (op[arity]) == insn_op->mode
969 || GET_MODE (op[arity]) == VOIDmode
970 || (insn_op->predicate == address_operand
971 && GET_MODE (op[arity]) == Pmode))
973 /* An address_operand usually has VOIDmode in the expander
974 so we cannot use this. */
975 machine_mode target_mode =
976 (insn_op->predicate == address_operand
977 ? Pmode : insn_op->mode);
978 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
981 if (!insn_op->predicate (op[arity], insn_op->mode))
983 error ("Invalid argument %d for builtin %qF", arity + 1, fndecl);
992 pat = GEN_FCN (icode) (target);
996 pat = GEN_FCN (icode) (target, op[0]);
998 pat = GEN_FCN (icode) (op[0]);
1002 pat = GEN_FCN (icode) (target, op[0], op[1]);
1004 pat = GEN_FCN (icode) (op[0], op[1]);
1008 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1010 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1014 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1016 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1020 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1022 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1026 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1028 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1044 static const int s390_hotpatch_hw_max = 1000000;
1045 static int s390_hotpatch_hw_before_label = 0;
1046 static int s390_hotpatch_hw_after_label = 0;
1048 /* Check whether the hotpatch attribute is applied to a function and, if it has
1049 an argument, the argument is valid. */
1052 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1053 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1059 if (TREE_CODE (*node) != FUNCTION_DECL)
1061 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1063 *no_add_attrs = true;
1065 if (args != NULL && TREE_CHAIN (args) != NULL)
1067 expr = TREE_VALUE (args);
1068 expr2 = TREE_VALUE (TREE_CHAIN (args));
1070 if (args == NULL || TREE_CHAIN (args) == NULL)
1072 else if (TREE_CODE (expr) != INTEGER_CST
1073 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1074 || wi::gtu_p (expr, s390_hotpatch_hw_max))
1076 else if (TREE_CODE (expr2) != INTEGER_CST
1077 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1078 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
1084 error ("requested %qE attribute is not a comma separated pair of"
1085 " non-negative integer constants or too large (max. %d)", name,
1086 s390_hotpatch_hw_max);
1087 *no_add_attrs = true;
1093 /* Expand the s390_vector_bool type attribute. */
1096 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1097 tree args ATTRIBUTE_UNUSED,
1098 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1100 tree type = *node, result = NULL_TREE;
1103 while (POINTER_TYPE_P (type)
1104 || TREE_CODE (type) == FUNCTION_TYPE
1105 || TREE_CODE (type) == METHOD_TYPE
1106 || TREE_CODE (type) == ARRAY_TYPE)
1107 type = TREE_TYPE (type);
1109 mode = TYPE_MODE (type);
1112 case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break;
1113 case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break;
1114 case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break;
1115 case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI];
1119 *no_add_attrs = true; /* No need to hang on to the attribute. */
1122 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1127 static const struct attribute_spec s390_attribute_table[] = {
1128 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
1129 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
1131 { NULL, 0, 0, false, false, false, NULL, false }
1134 /* Return the alignment for LABEL. We default to the -falign-labels
1135 value except for the literal pool base label. */
1137 s390_label_align (rtx_insn *label)
1139 rtx_insn *prev_insn = prev_active_insn (label);
1142 if (prev_insn == NULL_RTX)
1145 set = single_set (prev_insn);
1147 if (set == NULL_RTX)
1150 src = SET_SRC (set);
1152 /* Don't align literal pool base labels. */
1153 if (GET_CODE (src) == UNSPEC
1154 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1158 return align_labels_log;
1162 s390_libgcc_cmp_return_mode (void)
1164 return TARGET_64BIT ? DImode : SImode;
1168 s390_libgcc_shift_count_mode (void)
1170 return TARGET_64BIT ? DImode : SImode;
1174 s390_unwind_word_mode (void)
1176 return TARGET_64BIT ? DImode : SImode;
1179 /* Return true if the back end supports mode MODE. */
1181 s390_scalar_mode_supported_p (machine_mode mode)
1183 /* In contrast to the default implementation reject TImode constants on 31bit
1184 TARGET_ZARCH for ABI compliance. */
1185 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1188 if (DECIMAL_FLOAT_MODE_P (mode))
1189 return default_decimal_float_supported_p ();
1191 return default_scalar_mode_supported_p (mode);
1194 /* Return true if the back end supports vector mode MODE. */
1196 s390_vector_mode_supported_p (machine_mode mode)
1200 if (!VECTOR_MODE_P (mode)
1202 || GET_MODE_SIZE (mode) > 16)
1205 inner = GET_MODE_INNER (mode);
1223 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1226 s390_set_has_landing_pad_p (bool value)
1228 cfun->machine->has_landing_pad_p = value;
1231 /* If two condition code modes are compatible, return a condition code
1232 mode which is compatible with both. Otherwise, return
1236 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1244 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1245 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1266 /* Return true if SET either doesn't set the CC register, or else
1267 the source and destination have matching CC modes and that
1268 CC mode is at least as constrained as REQ_MODE. */
1271 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1273 machine_mode set_mode;
1275 gcc_assert (GET_CODE (set) == SET);
1277 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1280 set_mode = GET_MODE (SET_DEST (set));
1299 if (req_mode != set_mode)
1304 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1305 && req_mode != CCSRmode && req_mode != CCURmode)
1311 if (req_mode != CCAmode)
1319 return (GET_MODE (SET_SRC (set)) == set_mode);
1322 /* Return true if every SET in INSN that sets the CC register
1323 has source and destination with matching CC modes and that
1324 CC mode is at least as constrained as REQ_MODE.
1325 If REQ_MODE is VOIDmode, always return false. */
1328 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1332 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1333 if (req_mode == VOIDmode)
1336 if (GET_CODE (PATTERN (insn)) == SET)
1337 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1339 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1340 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1342 rtx set = XVECEXP (PATTERN (insn), 0, i);
1343 if (GET_CODE (set) == SET)
1344 if (!s390_match_ccmode_set (set, req_mode))
1351 /* If a test-under-mask instruction can be used to implement
1352 (compare (and ... OP1) OP2), return the CC mode required
1353 to do that. Otherwise, return VOIDmode.
1354 MIXED is true if the instruction can distinguish between
1355 CC1 and CC2 for mixed selected bits (TMxx), it is false
1356 if the instruction cannot (TM). */
1359 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1363 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1364 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1367 /* Selected bits all zero: CC0.
1368 e.g.: int a; if ((a & (16 + 128)) == 0) */
1369 if (INTVAL (op2) == 0)
1372 /* Selected bits all one: CC3.
1373 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1374 if (INTVAL (op2) == INTVAL (op1))
1377 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1379 if ((a & (16 + 128)) == 16) -> CCT1
1380 if ((a & (16 + 128)) == 128) -> CCT2 */
1383 bit1 = exact_log2 (INTVAL (op2));
1384 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1385 if (bit0 != -1 && bit1 != -1)
1386 return bit0 > bit1 ? CCT1mode : CCT2mode;
1392 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1393 OP0 and OP1 of a COMPARE, return the mode to be used for the
1397 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1400 && register_operand (op0, DFmode)
1401 && register_operand (op1, DFmode))
1403 /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either
1404 s390_emit_compare or s390_canonicalize_comparison will take
1426 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1427 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1429 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1430 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1432 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1433 || GET_CODE (op1) == NEG)
1434 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1437 if (GET_CODE (op0) == AND)
1439 /* Check whether we can potentially do it via TM. */
1440 machine_mode ccmode;
1441 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1442 if (ccmode != VOIDmode)
1444 /* Relax CCTmode to CCZmode to allow fall-back to AND
1445 if that turns out to be beneficial. */
1446 return ccmode == CCTmode ? CCZmode : ccmode;
1450 if (register_operand (op0, HImode)
1451 && GET_CODE (op1) == CONST_INT
1452 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1454 if (register_operand (op0, QImode)
1455 && GET_CODE (op1) == CONST_INT
1456 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1465 /* The only overflow condition of NEG and ABS happens when
1466 -INT_MAX is used as parameter, which stays negative. So
1467 we have an overflow from a positive value to a negative.
1468 Using CCAP mode the resulting cc can be used for comparisons. */
1469 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1470 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1473 /* If constants are involved in an add instruction it is possible to use
1474 the resulting cc for comparisons with zero. Knowing the sign of the
1475 constant the overflow behavior gets predictable. e.g.:
1476 int a, b; if ((b = a + c) > 0)
1477 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1478 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1479 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1480 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1481 /* Avoid INT32_MIN on 32 bit. */
1482 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1484 if (INTVAL (XEXP((op0), 1)) < 0)
1498 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1499 && GET_CODE (op1) != CONST_INT)
1505 if (GET_CODE (op0) == PLUS
1506 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1509 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1510 && GET_CODE (op1) != CONST_INT)
1516 if (GET_CODE (op0) == MINUS
1517 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1520 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1521 && GET_CODE (op1) != CONST_INT)
1530 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1531 that we can implement more efficiently. */
1534 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1535 bool op0_preserve_value)
1537 if (op0_preserve_value)
1540 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1541 if ((*code == EQ || *code == NE)
1542 && *op1 == const0_rtx
1543 && GET_CODE (*op0) == ZERO_EXTRACT
1544 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1545 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1546 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1548 rtx inner = XEXP (*op0, 0);
1549 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1550 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1551 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1553 if (len > 0 && len < modesize
1554 && pos >= 0 && pos + len <= modesize
1555 && modesize <= HOST_BITS_PER_WIDE_INT)
1557 unsigned HOST_WIDE_INT block;
1558 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
1559 block <<= modesize - pos - len;
1561 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1562 gen_int_mode (block, GET_MODE (inner)));
1566 /* Narrow AND of memory against immediate to enable TM. */
1567 if ((*code == EQ || *code == NE)
1568 && *op1 == const0_rtx
1569 && GET_CODE (*op0) == AND
1570 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1571 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1573 rtx inner = XEXP (*op0, 0);
1574 rtx mask = XEXP (*op0, 1);
1576 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1577 if (GET_CODE (inner) == SUBREG
1578 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1579 && (GET_MODE_SIZE (GET_MODE (inner))
1580 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1582 & GET_MODE_MASK (GET_MODE (inner))
1583 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1585 inner = SUBREG_REG (inner);
1587 /* Do not change volatile MEMs. */
1588 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1590 int part = s390_single_part (XEXP (*op0, 1),
1591 GET_MODE (inner), QImode, 0);
1594 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1595 inner = adjust_address_nv (inner, QImode, part);
1596 *op0 = gen_rtx_AND (QImode, inner, mask);
1601 /* Narrow comparisons against 0xffff to HImode if possible. */
1602 if ((*code == EQ || *code == NE)
1603 && GET_CODE (*op1) == CONST_INT
1604 && INTVAL (*op1) == 0xffff
1605 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1606 && (nonzero_bits (*op0, GET_MODE (*op0))
1607 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
1609 *op0 = gen_lowpart (HImode, *op0);
1613 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1614 if (GET_CODE (*op0) == UNSPEC
1615 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1616 && XVECLEN (*op0, 0) == 1
1617 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1618 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1619 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1620 && *op1 == const0_rtx)
1622 enum rtx_code new_code = UNKNOWN;
1625 case EQ: new_code = EQ; break;
1626 case NE: new_code = NE; break;
1627 case LT: new_code = GTU; break;
1628 case GT: new_code = LTU; break;
1629 case LE: new_code = GEU; break;
1630 case GE: new_code = LEU; break;
1634 if (new_code != UNKNOWN)
1636 *op0 = XVECEXP (*op0, 0, 0);
1641 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1642 if (GET_CODE (*op0) == UNSPEC
1643 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1644 && XVECLEN (*op0, 0) == 1
1645 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1646 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1647 && CONST_INT_P (*op1))
1649 enum rtx_code new_code = UNKNOWN;
1650 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1656 case EQ: new_code = EQ; break;
1657 case NE: new_code = NE; break;
1664 if (new_code != UNKNOWN)
1666 /* For CCRAWmode put the required cc mask into the second
1668 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1669 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1670 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1671 *op0 = XVECEXP (*op0, 0, 0);
1676 /* Simplify cascaded EQ, NE with const0_rtx. */
1677 if ((*code == NE || *code == EQ)
1678 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1679 && GET_MODE (*op0) == SImode
1680 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1681 && REG_P (XEXP (*op0, 0))
1682 && XEXP (*op0, 1) == const0_rtx
1683 && *op1 == const0_rtx)
1685 if ((*code == EQ && GET_CODE (*op0) == NE)
1686 || (*code == NE && GET_CODE (*op0) == EQ))
1690 *op0 = XEXP (*op0, 0);
1693 /* Prefer register over memory as first operand. */
1694 if (MEM_P (*op0) && REG_P (*op1))
1696 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1697 *code = (int)swap_condition ((enum rtx_code)*code);
1700 /* Using the scalar variants of vector instructions for 64 bit FP
1701 comparisons might require swapping the operands. */
1703 && register_operand (*op0, DFmode)
1704 && register_operand (*op1, DFmode)
1705 && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
1711 case LT: *code = GT; break;
1712 case LE: *code = GE; break;
1713 case UNGT: *code = UNLE; break;
1714 case UNGE: *code = UNLT; break;
1717 tmp = *op0; *op0 = *op1; *op1 = tmp;
1721 /* Helper function for s390_emit_compare. If possible emit a 64 bit
1722 FP compare using the single element variant of vector instructions.
1723 Replace CODE with the comparison code to be used in the CC reg
1724 compare and return the condition code register RTX in CC. */
1727 s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
1730 machine_mode cmp_mode;
1731 bool swap_p = false;
1735 case EQ: cmp_mode = CCVEQmode; break;
1736 case NE: cmp_mode = CCVEQmode; break;
1737 case GT: cmp_mode = CCVFHmode; break;
1738 case GE: cmp_mode = CCVFHEmode; break;
1739 case UNLE: cmp_mode = CCVFHmode; break;
1740 case UNLT: cmp_mode = CCVFHEmode; break;
1741 case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break;
1742 case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break;
1743 case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break;
1744 case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
1745 default: return false;
1754 *cc = gen_rtx_REG (cmp_mode, CC_REGNUM);
1755 emit_insn (gen_rtx_PARALLEL (VOIDmode,
1758 gen_rtx_COMPARE (cmp_mode, cmp1,
1760 gen_rtx_CLOBBER (VOIDmode,
1761 gen_rtx_SCRATCH (V2DImode)))));
1766 /* Emit a compare instruction suitable to implement the comparison
1767 OP0 CODE OP1. Return the correct condition RTL to be placed in
1768 the IF_THEN_ELSE of the conditional branch testing the result. */
1771 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1773 machine_mode mode = s390_select_ccmode (code, op0, op1);
1777 && register_operand (op0, DFmode)
1778 && register_operand (op1, DFmode)
1779 && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
1781 /* Work has been done by s390_expand_vec_compare_scalar already. */
1783 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1785 /* Do not output a redundant compare instruction if a
1786 compare_and_swap pattern already computed the result and the
1787 machine modes are compatible. */
1788 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1794 cc = gen_rtx_REG (mode, CC_REGNUM);
1795 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1798 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1801 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1803 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1804 conditional branch testing the result. */
1807 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1808 rtx cmp, rtx new_rtx)
1810 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1811 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1815 /* Emit a jump instruction to TARGET and return it. If COND is
1816 NULL_RTX, emit an unconditional jump, else a conditional jump under
1820 s390_emit_jump (rtx target, rtx cond)
1824 target = gen_rtx_LABEL_REF (VOIDmode, target);
1826 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1828 insn = gen_rtx_SET (pc_rtx, target);
1829 return emit_jump_insn (insn);
1832 /* Return branch condition mask to implement a branch
1833 specified by CODE. Return -1 for invalid comparisons. */
1836 s390_branch_condition_mask (rtx code)
1838 const int CC0 = 1 << 3;
1839 const int CC1 = 1 << 2;
1840 const int CC2 = 1 << 1;
1841 const int CC3 = 1 << 0;
1843 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1844 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1845 gcc_assert (XEXP (code, 1) == const0_rtx
1846 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1847 && CONST_INT_P (XEXP (code, 1))));
1850 switch (GET_MODE (XEXP (code, 0)))
1854 switch (GET_CODE (code))
1856 case EQ: return CC0;
1857 case NE: return CC1 | CC2 | CC3;
1863 switch (GET_CODE (code))
1865 case EQ: return CC1;
1866 case NE: return CC0 | CC2 | CC3;
1872 switch (GET_CODE (code))
1874 case EQ: return CC2;
1875 case NE: return CC0 | CC1 | CC3;
1881 switch (GET_CODE (code))
1883 case EQ: return CC3;
1884 case NE: return CC0 | CC1 | CC2;
1890 switch (GET_CODE (code))
1892 case EQ: return CC0 | CC2;
1893 case NE: return CC1 | CC3;
1899 switch (GET_CODE (code))
1901 case LTU: return CC2 | CC3; /* carry */
1902 case GEU: return CC0 | CC1; /* no carry */
1908 switch (GET_CODE (code))
1910 case GTU: return CC0 | CC1; /* borrow */
1911 case LEU: return CC2 | CC3; /* no borrow */
1917 switch (GET_CODE (code))
1919 case EQ: return CC0 | CC2;
1920 case NE: return CC1 | CC3;
1921 case LTU: return CC1;
1922 case GTU: return CC3;
1923 case LEU: return CC1 | CC2;
1924 case GEU: return CC2 | CC3;
1929 switch (GET_CODE (code))
1931 case EQ: return CC0;
1932 case NE: return CC1 | CC2 | CC3;
1933 case LTU: return CC1;
1934 case GTU: return CC2;
1935 case LEU: return CC0 | CC1;
1936 case GEU: return CC0 | CC2;
1942 switch (GET_CODE (code))
1944 case EQ: return CC0;
1945 case NE: return CC2 | CC1 | CC3;
1946 case LTU: return CC2;
1947 case GTU: return CC1;
1948 case LEU: return CC0 | CC2;
1949 case GEU: return CC0 | CC1;
1955 switch (GET_CODE (code))
1957 case EQ: return CC0;
1958 case NE: return CC1 | CC2 | CC3;
1959 case LT: return CC1 | CC3;
1960 case GT: return CC2;
1961 case LE: return CC0 | CC1 | CC3;
1962 case GE: return CC0 | CC2;
1968 switch (GET_CODE (code))
1970 case EQ: return CC0;
1971 case NE: return CC1 | CC2 | CC3;
1972 case LT: return CC1;
1973 case GT: return CC2 | CC3;
1974 case LE: return CC0 | CC1;
1975 case GE: return CC0 | CC2 | CC3;
1981 switch (GET_CODE (code))
1983 case EQ: return CC0;
1984 case NE: return CC1 | CC2 | CC3;
1985 case LT: return CC1;
1986 case GT: return CC2;
1987 case LE: return CC0 | CC1;
1988 case GE: return CC0 | CC2;
1989 case UNORDERED: return CC3;
1990 case ORDERED: return CC0 | CC1 | CC2;
1991 case UNEQ: return CC0 | CC3;
1992 case UNLT: return CC1 | CC3;
1993 case UNGT: return CC2 | CC3;
1994 case UNLE: return CC0 | CC1 | CC3;
1995 case UNGE: return CC0 | CC2 | CC3;
1996 case LTGT: return CC1 | CC2;
2002 switch (GET_CODE (code))
2004 case EQ: return CC0;
2005 case NE: return CC2 | CC1 | CC3;
2006 case LT: return CC2;
2007 case GT: return CC1;
2008 case LE: return CC0 | CC2;
2009 case GE: return CC0 | CC1;
2010 case UNORDERED: return CC3;
2011 case ORDERED: return CC0 | CC2 | CC1;
2012 case UNEQ: return CC0 | CC3;
2013 case UNLT: return CC2 | CC3;
2014 case UNGT: return CC1 | CC3;
2015 case UNLE: return CC0 | CC2 | CC3;
2016 case UNGE: return CC0 | CC1 | CC3;
2017 case LTGT: return CC2 | CC1;
2022 /* Vector comparison modes. */
2025 switch (GET_CODE (code))
2027 case EQ: return CC0;
2028 case NE: return CC3;
2033 switch (GET_CODE (code))
2035 case EQ: return CC0 | CC1;
2036 case NE: return CC3 | CC1;
2040 /* Integer vector compare modes. */
2043 switch (GET_CODE (code))
2045 case GT: return CC0;
2046 case LE: return CC3;
2051 switch (GET_CODE (code))
2053 case GT: return CC0 | CC1;
2054 case LE: return CC3 | CC1;
2059 switch (GET_CODE (code))
2061 case GTU: return CC0;
2062 case LEU: return CC3;
2067 switch (GET_CODE (code))
2069 case GTU: return CC0 | CC1;
2070 case LEU: return CC3 | CC1;
2074 /* FP vector compare modes. */
2077 switch (GET_CODE (code))
2079 case GT: return CC0;
2080 case UNLE: return CC3;
2085 switch (GET_CODE (code))
2087 case GT: return CC0 | CC1;
2088 case UNLE: return CC3 | CC1;
2093 switch (GET_CODE (code))
2095 case GE: return CC0;
2096 case UNLT: return CC3;
2101 switch (GET_CODE (code))
2103 case GE: return CC0 | CC1;
2104 case UNLT: return CC3 | CC1;
2110 switch (GET_CODE (code))
2113 return INTVAL (XEXP (code, 1));
2115 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2126 /* Return branch condition mask to implement a compare and branch
2127 specified by CODE. Return -1 for invalid comparisons. */
2130 s390_compare_and_branch_condition_mask (rtx code)
2132 const int CC0 = 1 << 3;
2133 const int CC1 = 1 << 2;
2134 const int CC2 = 1 << 1;
2136 switch (GET_CODE (code))
2160 /* If INV is false, return assembler mnemonic string to implement
2161 a branch specified by CODE. If INV is true, return mnemonic
2162 for the corresponding inverted branch. */
2165 s390_branch_condition_mnemonic (rtx code, int inv)
2169 static const char *const mnemonic[16] =
2171 NULL, "o", "h", "nle",
2172 "l", "nhe", "lh", "ne",
2173 "e", "nlh", "he", "nl",
2174 "le", "nh", "no", NULL
2177 if (GET_CODE (XEXP (code, 0)) == REG
2178 && REGNO (XEXP (code, 0)) == CC_REGNUM
2179 && (XEXP (code, 1) == const0_rtx
2180 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2181 && CONST_INT_P (XEXP (code, 1)))))
2182 mask = s390_branch_condition_mask (code);
2184 mask = s390_compare_and_branch_condition_mask (code);
2186 gcc_assert (mask >= 0);
2191 gcc_assert (mask >= 1 && mask <= 14);
2193 return mnemonic[mask];
2196 /* Return the part of op which has a value different from def.
2197 The size of the part is determined by mode.
2198 Use this function only if you already know that op really
2199 contains such a part. */
2201 unsigned HOST_WIDE_INT
2202 s390_extract_part (rtx op, machine_mode mode, int def)
2204 unsigned HOST_WIDE_INT value = 0;
2205 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2206 int part_bits = GET_MODE_BITSIZE (mode);
2207 unsigned HOST_WIDE_INT part_mask
2208 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
2211 for (i = 0; i < max_parts; i++)
2214 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2216 value >>= part_bits;
2218 if ((value & part_mask) != (def & part_mask))
2219 return value & part_mask;
2225 /* If OP is an integer constant of mode MODE with exactly one
2226 part of mode PART_MODE unequal to DEF, return the number of that
2227 part. Otherwise, return -1. */
2230 s390_single_part (rtx op,
2232 machine_mode part_mode,
2235 unsigned HOST_WIDE_INT value = 0;
2236 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2237 unsigned HOST_WIDE_INT part_mask
2238 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
2241 if (GET_CODE (op) != CONST_INT)
2244 for (i = 0; i < n_parts; i++)
2247 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2249 value >>= GET_MODE_BITSIZE (part_mode);
2251 if ((value & part_mask) != (def & part_mask))
2259 return part == -1 ? -1 : n_parts - 1 - part;
2262 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2263 bits and no other bits are set in (the lower SIZE bits of) IN.
2265 PSTART and PEND can be used to obtain the start and end
2266 position (inclusive) of the bitfield relative to 64
2267 bits. *PSTART / *PEND gives the position of the first/last bit
2268 of the bitfield counting from the highest order bit starting
2272 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2273 int *pstart, int *pend)
2277 int lowbit = sizeof (HOST_WIDE_INT) * BITS_PER_UNIT - 1;
2278 int highbit = sizeof (HOST_WIDE_INT) * BITS_PER_UNIT - size;
2279 unsigned HOST_WIDE_INT bitmask = 1ULL;
2281 gcc_assert (!!pstart == !!pend);
2282 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2285 /* Look for the rightmost bit of a contiguous range of ones. */
2292 /* Look for the firt zero bit after the range of ones. */
2293 if (! (bitmask & in))
2297 /* We're one past the last one-bit. */
2301 /* No one bits found. */
2304 if (start > highbit)
2306 unsigned HOST_WIDE_INT mask;
2308 /* Calculate a mask for all bits beyond the contiguous bits. */
2309 mask = ((~(0ULL) >> highbit) & (~(0ULL) << (lowbit - start + 1)));
2311 /* There are more bits set beyond the first range of one bits. */
2324 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2325 if ~IN contains a contiguous bitfield. In that case, *END is <
2328 If WRAP_P is true, a bitmask that wraps around is also tested.
2329 When a wraparoud occurs *START is greater than *END (in
2330 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2331 part of the range. If WRAP_P is false, no wraparound is
2335 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2336 int size, int *start, int *end)
2338 int bs = sizeof (HOST_WIDE_INT) * BITS_PER_UNIT;
2341 gcc_assert (!!start == !!end);
2342 if ((in & ((~(0ULL)) >> (bs - size))) == 0)
2343 /* This cannot be expressed as a contiguous bitmask. Exit early because
2344 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2347 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2352 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2358 gcc_assert (s >= 1);
2359 *start = ((e + 1) & (bs - 1));
2360 *end = ((s - 1 + bs) & (bs - 1));
2366 /* Return true if OP contains the same contiguous bitfield in *all*
2367 its elements. START and END can be used to obtain the start and
2368 end position of the bitfield.
2370 START/STOP give the position of the first/last bit of the bitfield
2371 counting from the lowest order bit starting with zero. In order to
2372 use these values for S/390 instructions this has to be converted to
2373 "bits big endian" style. */
2376 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2378 unsigned HOST_WIDE_INT mask;
2383 gcc_assert (!!start == !!end);
2384 if (!const_vec_duplicate_p (op, &elt)
2385 || !CONST_INT_P (elt))
2388 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2390 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2394 mask = UINTVAL (elt);
2396 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2401 int bs = sizeof (HOST_WIDE_INT) * BITS_PER_UNIT;
2403 *start -= (bs - size);
2404 *end -= (bs - size);
2412 /* Return true if C consists only of byte chunks being either 0 or
2413 0xff. If MASK is !=NULL a byte mask is generated which is
2414 appropriate for the vector generate byte mask instruction. */
2417 s390_bytemask_vector_p (rtx op, unsigned *mask)
2420 unsigned tmp_mask = 0;
2421 int nunit, unit_size;
2423 if (!VECTOR_MODE_P (GET_MODE (op))
2424 || GET_CODE (op) != CONST_VECTOR
2425 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2428 nunit = GET_MODE_NUNITS (GET_MODE (op));
2429 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2431 for (i = 0; i < nunit; i++)
2433 unsigned HOST_WIDE_INT c;
2436 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2439 c = UINTVAL (XVECEXP (op, 0, i));
2440 for (j = 0; j < unit_size; j++)
2442 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2444 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2445 c = c >> BITS_PER_UNIT;
2455 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2456 equivalent to a shift followed by the AND. In particular, CONTIG
2457 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2458 for ROTL indicate a rotate to the right. */
2461 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2466 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2470 return (64 - end >= rotl);
2473 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2475 rotl = -rotl + (64 - bitsize);
2476 return (start >= rotl);
2480 /* Check whether we can (and want to) split a double-word
2481 move in mode MODE from SRC to DST into two single-word
2482 moves, moving the subword FIRST_SUBWORD first. */
2485 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2487 /* Floating point and vector registers cannot be split. */
2488 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2491 /* We don't need to split if operands are directly accessible. */
2492 if (s_operand (src, mode) || s_operand (dst, mode))
2495 /* Non-offsettable memory references cannot be split. */
2496 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2497 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2500 /* Moving the first subword must not clobber a register
2501 needed to move the second subword. */
2502 if (register_operand (dst, mode))
2504 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2505 if (reg_overlap_mentioned_p (subreg, src))
2512 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2513 and [MEM2, MEM2 + SIZE] do overlap and false
2517 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2519 rtx addr1, addr2, addr_delta;
2520 HOST_WIDE_INT delta;
2522 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2528 addr1 = XEXP (mem1, 0);
2529 addr2 = XEXP (mem2, 0);
2531 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2533 /* This overlapping check is used by peepholes merging memory block operations.
2534 Overlapping operations would otherwise be recognized by the S/390 hardware
2535 and would fall back to a slower implementation. Allowing overlapping
2536 operations would lead to slow code but not to wrong code. Therefore we are
2537 somewhat optimistic if we cannot prove that the memory blocks are
2539 That's why we return false here although this may accept operations on
2540 overlapping memory areas. */
2541 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2544 delta = INTVAL (addr_delta);
2547 || (delta > 0 && delta < size)
2548 || (delta < 0 && -delta < size))
2554 /* Check whether the address of memory reference MEM2 equals exactly
2555 the address of memory reference MEM1 plus DELTA. Return true if
2556 we can prove this to be the case, false otherwise. */
2559 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2561 rtx addr1, addr2, addr_delta;
2563 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2566 addr1 = XEXP (mem1, 0);
2567 addr2 = XEXP (mem2, 0);
2569 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2570 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2576 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2579 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2582 machine_mode wmode = mode;
2583 rtx dst = operands[0];
2584 rtx src1 = operands[1];
2585 rtx src2 = operands[2];
2588 /* If we cannot handle the operation directly, use a temp register. */
2589 if (!s390_logical_operator_ok_p (operands))
2590 dst = gen_reg_rtx (mode);
2592 /* QImode and HImode patterns make sense only if we have a destination
2593 in memory. Otherwise perform the operation in SImode. */
2594 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2597 /* Widen operands if required. */
2600 if (GET_CODE (dst) == SUBREG
2601 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2603 else if (REG_P (dst))
2604 dst = gen_rtx_SUBREG (wmode, dst, 0);
2606 dst = gen_reg_rtx (wmode);
2608 if (GET_CODE (src1) == SUBREG
2609 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2611 else if (GET_MODE (src1) != VOIDmode)
2612 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2614 if (GET_CODE (src2) == SUBREG
2615 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2617 else if (GET_MODE (src2) != VOIDmode)
2618 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2621 /* Emit the instruction. */
2622 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2623 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2624 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2626 /* Fix up the destination if needed. */
2627 if (dst != operands[0])
2628 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2631 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2634 s390_logical_operator_ok_p (rtx *operands)
2636 /* If the destination operand is in memory, it needs to coincide
2637 with one of the source operands. After reload, it has to be
2638 the first source operand. */
2639 if (GET_CODE (operands[0]) == MEM)
2640 return rtx_equal_p (operands[0], operands[1])
2641 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2646 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2647 operand IMMOP to switch from SS to SI type instructions. */
2650 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2652 int def = code == AND ? -1 : 0;
2656 gcc_assert (GET_CODE (*memop) == MEM);
2657 gcc_assert (!MEM_VOLATILE_P (*memop));
2659 mask = s390_extract_part (*immop, QImode, def);
2660 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2661 gcc_assert (part >= 0);
2663 *memop = adjust_address (*memop, QImode, part);
2664 *immop = gen_int_mode (mask, QImode);
2668 /* How to allocate a 'struct machine_function'. */
2670 static struct machine_function *
2671 s390_init_machine_status (void)
2673 return ggc_cleared_alloc<machine_function> ();
2676 /* Map for smallest class containing reg regno. */
2678 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2679 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2680 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2681 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2682 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2683 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2684 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2685 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2686 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2687 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2688 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2689 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2690 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2691 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2692 VEC_REGS, VEC_REGS /* 52 */
2695 /* Return attribute type of insn. */
2697 static enum attr_type
2698 s390_safe_attr_type (rtx_insn *insn)
2700 if (recog_memoized (insn) >= 0)
2701 return get_attr_type (insn);
2706 /* Return true if DISP is a valid short displacement. */
2709 s390_short_displacement (rtx disp)
2711 /* No displacement is OK. */
2715 /* Without the long displacement facility we don't need to
2716 distingiush between long and short displacement. */
2717 if (!TARGET_LONG_DISPLACEMENT)
2720 /* Integer displacement in range. */
2721 if (GET_CODE (disp) == CONST_INT)
2722 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2724 /* GOT offset is not OK, the GOT can be large. */
2725 if (GET_CODE (disp) == CONST
2726 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2727 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2728 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2731 /* All other symbolic constants are literal pool references,
2732 which are OK as the literal pool must be small. */
2733 if (GET_CODE (disp) == CONST)
2739 /* Decompose a RTL expression ADDR for a memory address into
2740 its components, returned in OUT.
2742 Returns false if ADDR is not a valid memory address, true
2743 otherwise. If OUT is NULL, don't return the components,
2744 but check for validity only.
2746 Note: Only addresses in canonical form are recognized.
2747 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2748 canonical form so that they will be recognized. */
2751 s390_decompose_address (rtx addr, struct s390_address *out)
2753 HOST_WIDE_INT offset = 0;
2754 rtx base = NULL_RTX;
2755 rtx indx = NULL_RTX;
2756 rtx disp = NULL_RTX;
2758 bool pointer = false;
2759 bool base_ptr = false;
2760 bool indx_ptr = false;
2761 bool literal_pool = false;
2763 /* We may need to substitute the literal pool base register into the address
2764 below. However, at this point we do not know which register is going to
2765 be used as base, so we substitute the arg pointer register. This is going
2766 to be treated as holding a pointer below -- it shouldn't be used for any
2768 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2770 /* Decompose address into base + index + displacement. */
2772 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2775 else if (GET_CODE (addr) == PLUS)
2777 rtx op0 = XEXP (addr, 0);
2778 rtx op1 = XEXP (addr, 1);
2779 enum rtx_code code0 = GET_CODE (op0);
2780 enum rtx_code code1 = GET_CODE (op1);
2782 if (code0 == REG || code0 == UNSPEC)
2784 if (code1 == REG || code1 == UNSPEC)
2786 indx = op0; /* index + base */
2792 base = op0; /* base + displacement */
2797 else if (code0 == PLUS)
2799 indx = XEXP (op0, 0); /* index + base + disp */
2800 base = XEXP (op0, 1);
2811 disp = addr; /* displacement */
2813 /* Extract integer part of displacement. */
2817 if (GET_CODE (disp) == CONST_INT)
2819 offset = INTVAL (disp);
2822 else if (GET_CODE (disp) == CONST
2823 && GET_CODE (XEXP (disp, 0)) == PLUS
2824 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2826 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2827 disp = XEXP (XEXP (disp, 0), 0);
2831 /* Strip off CONST here to avoid special case tests later. */
2832 if (disp && GET_CODE (disp) == CONST)
2833 disp = XEXP (disp, 0);
2835 /* We can convert literal pool addresses to
2836 displacements by basing them off the base register. */
2837 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2839 /* Either base or index must be free to hold the base register. */
2841 base = fake_pool_base, literal_pool = true;
2843 indx = fake_pool_base, literal_pool = true;
2847 /* Mark up the displacement. */
2848 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2849 UNSPEC_LTREL_OFFSET);
2852 /* Validate base register. */
2855 if (GET_CODE (base) == UNSPEC)
2856 switch (XINT (base, 1))
2860 disp = gen_rtx_UNSPEC (Pmode,
2861 gen_rtvec (1, XVECEXP (base, 0, 0)),
2862 UNSPEC_LTREL_OFFSET);
2866 base = XVECEXP (base, 0, 1);
2869 case UNSPEC_LTREL_BASE:
2870 if (XVECLEN (base, 0) == 1)
2871 base = fake_pool_base, literal_pool = true;
2873 base = XVECEXP (base, 0, 1);
2880 if (!REG_P (base) || GET_MODE (base) != Pmode)
2883 if (REGNO (base) == STACK_POINTER_REGNUM
2884 || REGNO (base) == FRAME_POINTER_REGNUM
2885 || ((reload_completed || reload_in_progress)
2886 && frame_pointer_needed
2887 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2888 || REGNO (base) == ARG_POINTER_REGNUM
2890 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2891 pointer = base_ptr = true;
2893 if ((reload_completed || reload_in_progress)
2894 && base == cfun->machine->base_reg)
2895 pointer = base_ptr = literal_pool = true;
2898 /* Validate index register. */
2901 if (GET_CODE (indx) == UNSPEC)
2902 switch (XINT (indx, 1))
2906 disp = gen_rtx_UNSPEC (Pmode,
2907 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2908 UNSPEC_LTREL_OFFSET);
2912 indx = XVECEXP (indx, 0, 1);
2915 case UNSPEC_LTREL_BASE:
2916 if (XVECLEN (indx, 0) == 1)
2917 indx = fake_pool_base, literal_pool = true;
2919 indx = XVECEXP (indx, 0, 1);
2926 if (!REG_P (indx) || GET_MODE (indx) != Pmode)
2929 if (REGNO (indx) == STACK_POINTER_REGNUM
2930 || REGNO (indx) == FRAME_POINTER_REGNUM
2931 || ((reload_completed || reload_in_progress)
2932 && frame_pointer_needed
2933 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2934 || REGNO (indx) == ARG_POINTER_REGNUM
2936 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2937 pointer = indx_ptr = true;
2939 if ((reload_completed || reload_in_progress)
2940 && indx == cfun->machine->base_reg)
2941 pointer = indx_ptr = literal_pool = true;
2944 /* Prefer to use pointer as base, not index. */
2945 if (base && indx && !base_ptr
2946 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2953 /* Validate displacement. */
2956 /* If virtual registers are involved, the displacement will change later
2957 anyway as the virtual registers get eliminated. This could make a
2958 valid displacement invalid, but it is more likely to make an invalid
2959 displacement valid, because we sometimes access the register save area
2960 via negative offsets to one of those registers.
2961 Thus we don't check the displacement for validity here. If after
2962 elimination the displacement turns out to be invalid after all,
2963 this is fixed up by reload in any case. */
2964 /* LRA maintains always displacements up to date and we need to
2965 know the displacement is right during all LRA not only at the
2966 final elimination. */
2968 || (base != arg_pointer_rtx
2969 && indx != arg_pointer_rtx
2970 && base != return_address_pointer_rtx
2971 && indx != return_address_pointer_rtx
2972 && base != frame_pointer_rtx
2973 && indx != frame_pointer_rtx
2974 && base != virtual_stack_vars_rtx
2975 && indx != virtual_stack_vars_rtx))
2976 if (!DISP_IN_RANGE (offset))
2981 /* All the special cases are pointers. */
2984 /* In the small-PIC case, the linker converts @GOT
2985 and @GOTNTPOFF offsets to possible displacements. */
2986 if (GET_CODE (disp) == UNSPEC
2987 && (XINT (disp, 1) == UNSPEC_GOT
2988 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2994 /* Accept pool label offsets. */
2995 else if (GET_CODE (disp) == UNSPEC
2996 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2999 /* Accept literal pool references. */
3000 else if (GET_CODE (disp) == UNSPEC
3001 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3003 /* In case CSE pulled a non literal pool reference out of
3004 the pool we have to reject the address. This is
3005 especially important when loading the GOT pointer on non
3006 zarch CPUs. In this case the literal pool contains an lt
3007 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3008 will most likely exceed the displacement. */
3009 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3010 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3013 orig_disp = gen_rtx_CONST (Pmode, disp);
3016 /* If we have an offset, make sure it does not
3017 exceed the size of the constant pool entry. */
3018 rtx sym = XVECEXP (disp, 0, 0);
3019 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3022 orig_disp = plus_constant (Pmode, orig_disp, offset);
3037 out->disp = orig_disp;
3038 out->pointer = pointer;
3039 out->literal_pool = literal_pool;
3045 /* Decompose a RTL expression OP for an address style operand into its
3046 components, and return the base register in BASE and the offset in
3047 OFFSET. While OP looks like an address it is never supposed to be
3050 Return true if OP is a valid address operand, false if not. */
3053 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3054 HOST_WIDE_INT *offset)
3058 /* We can have an integer constant, an address register,
3059 or a sum of the two. */
3060 if (CONST_SCALAR_INT_P (op))
3065 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3070 while (op && GET_CODE (op) == SUBREG)
3071 op = SUBREG_REG (op);
3073 if (op && GET_CODE (op) != REG)
3078 if (off == NULL_RTX)
3080 else if (CONST_INT_P (off))
3081 *offset = INTVAL (off);
3082 else if (CONST_WIDE_INT_P (off))
3083 /* The offset will anyway be cut down to 12 bits so take just
3084 the lowest order chunk of the wide int. */
3085 *offset = CONST_WIDE_INT_ELT (off, 0);
3096 /* Return true if CODE is a valid address without index. */
3099 s390_legitimate_address_without_index_p (rtx op)
3101 struct s390_address addr;
3103 if (!s390_decompose_address (XEXP (op, 0), &addr))
3112 /* Return TRUE if ADDR is an operand valid for a load/store relative
3113 instruction. Be aware that the alignment of the operand needs to
3114 be checked separately.
3115 Valid addresses are single references or a sum of a reference and a
3116 constant integer. Return these parts in SYMREF and ADDEND. You can
3117 pass NULL in REF and/or ADDEND if you are not interested in these
3118 values. Literal pool references are *not* considered symbol
3122 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3124 HOST_WIDE_INT tmpaddend = 0;
3126 if (GET_CODE (addr) == CONST)
3127 addr = XEXP (addr, 0);
3129 if (GET_CODE (addr) == PLUS)
3131 if (!CONST_INT_P (XEXP (addr, 1)))
3134 tmpaddend = INTVAL (XEXP (addr, 1));
3135 addr = XEXP (addr, 0);
3138 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3139 || (GET_CODE (addr) == UNSPEC
3140 && (XINT (addr, 1) == UNSPEC_GOTENT
3141 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3146 *addend = tmpaddend;
3153 /* Return true if the address in OP is valid for constraint letter C
3154 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3155 pool MEMs should be accepted. Only the Q, R, S, T constraint
3156 letters are allowed for C. */
3159 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3161 struct s390_address addr;
3162 bool decomposed = false;
3164 /* This check makes sure that no symbolic address (except literal
3165 pool references) are accepted by the R or T constraints. */
3166 if (s390_loadrelative_operand_p (op, NULL, NULL))
3169 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3172 if (!s390_decompose_address (op, &addr))
3174 if (addr.literal_pool)
3179 /* With reload, we sometimes get intermediate address forms that are
3180 actually invalid as-is, but we need to accept them in the most
3181 generic cases below ('R' or 'T'), since reload will in fact fix
3182 them up. LRA behaves differently here; we never see such forms,
3183 but on the other hand, we need to strictly reject every invalid
3184 address form. Perform this check right up front. */
3185 if (lra_in_progress)
3187 if (!decomposed && !s390_decompose_address (op, &addr))
3194 case 'Q': /* no index short displacement */
3195 if (!decomposed && !s390_decompose_address (op, &addr))
3199 if (!s390_short_displacement (addr.disp))
3203 case 'R': /* with index short displacement */
3204 if (TARGET_LONG_DISPLACEMENT)
3206 if (!decomposed && !s390_decompose_address (op, &addr))
3208 if (!s390_short_displacement (addr.disp))
3211 /* Any invalid address here will be fixed up by reload,
3212 so accept it for the most generic constraint. */
3215 case 'S': /* no index long displacement */
3216 if (!decomposed && !s390_decompose_address (op, &addr))
3222 case 'T': /* with index long displacement */
3223 /* Any invalid address here will be fixed up by reload,
3224 so accept it for the most generic constraint. */
3234 /* Evaluates constraint strings described by the regular expression
3235 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3236 the constraint given in STR, or 0 else. */
3239 s390_mem_constraint (const char *str, rtx op)
3246 /* Check for offsettable variants of memory constraints. */
3247 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3249 if ((reload_completed || reload_in_progress)
3250 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3252 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3254 /* Check for non-literal-pool variants of memory constraints. */
3257 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3262 if (GET_CODE (op) != MEM)
3264 return s390_check_qrst_address (c, XEXP (op, 0), true);
3266 /* Simply check for the basic form of a shift count. Reload will
3267 take care of making sure we have a proper base register. */
3268 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3272 return s390_check_qrst_address (str[1], op, true);
3280 /* Evaluates constraint strings starting with letter O. Input
3281 parameter C is the second letter following the "O" in the constraint
3282 string. Returns 1 if VALUE meets the respective constraint and 0
3286 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3294 return trunc_int_for_mode (value, SImode) == value;
3298 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3301 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3309 /* Evaluates constraint strings starting with letter N. Parameter STR
3310 contains the letters following letter "N" in the constraint string.
3311 Returns true if VALUE matches the constraint. */
3314 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3316 machine_mode mode, part_mode;
3318 int part, part_goal;
3324 part_goal = str[0] - '0';
3368 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3371 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3374 if (part_goal != -1 && part_goal != part)
3381 /* Returns true if the input parameter VALUE is a float zero. */
3384 s390_float_const_zero_p (rtx value)
3386 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3387 && value == CONST0_RTX (GET_MODE (value)));
3390 /* Implement TARGET_REGISTER_MOVE_COST. */
3393 s390_register_move_cost (machine_mode mode,
3394 reg_class_t from, reg_class_t to)
3396 /* On s390, copy between fprs and gprs is expensive. */
3398 /* It becomes somewhat faster having ldgr/lgdr. */
3399 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3401 /* ldgr is single cycle. */
3402 if (reg_classes_intersect_p (from, GENERAL_REGS)
3403 && reg_classes_intersect_p (to, FP_REGS))
3405 /* lgdr needs 3 cycles. */
3406 if (reg_classes_intersect_p (to, GENERAL_REGS)
3407 && reg_classes_intersect_p (from, FP_REGS))
3411 /* Otherwise copying is done via memory. */
3412 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3413 && reg_classes_intersect_p (to, FP_REGS))
3414 || (reg_classes_intersect_p (from, FP_REGS)
3415 && reg_classes_intersect_p (to, GENERAL_REGS)))
3421 /* Implement TARGET_MEMORY_MOVE_COST. */
3424 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3425 reg_class_t rclass ATTRIBUTE_UNUSED,
3426 bool in ATTRIBUTE_UNUSED)
3431 /* Compute a (partial) cost for rtx X. Return true if the complete
3432 cost has been computed, and false if subexpressions should be
3433 scanned. In either case, *TOTAL contains the cost result. The
3434 initial value of *TOTAL is the default value computed by
3435 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3436 code of the superexpression of x. */
3439 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3440 int opno ATTRIBUTE_UNUSED,
3441 int *total, bool speed ATTRIBUTE_UNUSED)
3443 int code = GET_CODE (x);
3451 case CONST_WIDE_INT:
3458 if (GET_CODE (XEXP (x, 0)) == AND
3459 && GET_CODE (XEXP (x, 1)) == ASHIFT
3460 && REG_P (XEXP (XEXP (x, 0), 0))
3461 && REG_P (XEXP (XEXP (x, 1), 0))
3462 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3463 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3464 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3465 (1UL << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3467 *total = COSTS_N_INSNS (2);
3479 *total = COSTS_N_INSNS (1);
3484 *total = COSTS_N_INSNS (1);
3492 rtx left = XEXP (x, 0);
3493 rtx right = XEXP (x, 1);
3494 if (GET_CODE (right) == CONST_INT
3495 && CONST_OK_FOR_K (INTVAL (right)))
3496 *total = s390_cost->mhi;
3497 else if (GET_CODE (left) == SIGN_EXTEND)
3498 *total = s390_cost->mh;
3500 *total = s390_cost->ms; /* msr, ms, msy */
3505 rtx left = XEXP (x, 0);
3506 rtx right = XEXP (x, 1);
3509 if (GET_CODE (right) == CONST_INT
3510 && CONST_OK_FOR_K (INTVAL (right)))
3511 *total = s390_cost->mghi;
3512 else if (GET_CODE (left) == SIGN_EXTEND)
3513 *total = s390_cost->msgf;
3515 *total = s390_cost->msg; /* msgr, msg */
3517 else /* TARGET_31BIT */
3519 if (GET_CODE (left) == SIGN_EXTEND
3520 && GET_CODE (right) == SIGN_EXTEND)
3521 /* mulsidi case: mr, m */
3522 *total = s390_cost->m;
3523 else if (GET_CODE (left) == ZERO_EXTEND
3524 && GET_CODE (right) == ZERO_EXTEND
3525 && TARGET_CPU_ZARCH)
3526 /* umulsidi case: ml, mlr */
3527 *total = s390_cost->ml;
3529 /* Complex calculation is required. */
3530 *total = COSTS_N_INSNS (40);
3536 *total = s390_cost->mult_df;
3539 *total = s390_cost->mxbr;
3550 *total = s390_cost->madbr;
3553 *total = s390_cost->maebr;
3558 /* Negate in the third argument is free: FMSUB. */
3559 if (GET_CODE (XEXP (x, 2)) == NEG)
3561 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3562 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3563 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3570 if (mode == TImode) /* 128 bit division */
3571 *total = s390_cost->dlgr;
3572 else if (mode == DImode)
3574 rtx right = XEXP (x, 1);
3575 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3576 *total = s390_cost->dlr;
3577 else /* 64 by 64 bit division */
3578 *total = s390_cost->dlgr;
3580 else if (mode == SImode) /* 32 bit division */
3581 *total = s390_cost->dlr;
3588 rtx right = XEXP (x, 1);
3589 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3591 *total = s390_cost->dsgfr;
3593 *total = s390_cost->dr;
3594 else /* 64 by 64 bit division */
3595 *total = s390_cost->dsgr;
3597 else if (mode == SImode) /* 32 bit division */
3598 *total = s390_cost->dlr;
3599 else if (mode == SFmode)
3601 *total = s390_cost->debr;
3603 else if (mode == DFmode)
3605 *total = s390_cost->ddbr;
3607 else if (mode == TFmode)
3609 *total = s390_cost->dxbr;
3615 *total = s390_cost->sqebr;
3616 else if (mode == DFmode)
3617 *total = s390_cost->sqdbr;
3619 *total = s390_cost->sqxbr;
3624 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3625 || outer_code == PLUS || outer_code == MINUS
3626 || outer_code == COMPARE)
3631 *total = COSTS_N_INSNS (1);
3632 if (GET_CODE (XEXP (x, 0)) == AND
3633 && GET_CODE (XEXP (x, 1)) == CONST_INT
3634 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3636 rtx op0 = XEXP (XEXP (x, 0), 0);
3637 rtx op1 = XEXP (XEXP (x, 0), 1);
3638 rtx op2 = XEXP (x, 1);
3640 if (memory_operand (op0, GET_MODE (op0))
3641 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3643 if (register_operand (op0, GET_MODE (op0))
3644 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3654 /* Return the cost of an address rtx ADDR. */
3657 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3658 addr_space_t as ATTRIBUTE_UNUSED,
3659 bool speed ATTRIBUTE_UNUSED)
3661 struct s390_address ad;
3662 if (!s390_decompose_address (addr, &ad))
3665 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3668 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3669 otherwise return 0. */
3672 tls_symbolic_operand (rtx op)
3674 if (GET_CODE (op) != SYMBOL_REF)
3676 return SYMBOL_REF_TLS_MODEL (op);
3679 /* Split DImode access register reference REG (on 64-bit) into its constituent
3680 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3681 gen_highpart cannot be used as they assume all registers are word-sized,
3682 while our access registers have only half that size. */
3685 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3687 gcc_assert (TARGET_64BIT);
3688 gcc_assert (ACCESS_REG_P (reg));
3689 gcc_assert (GET_MODE (reg) == DImode);
3690 gcc_assert (!(REGNO (reg) & 1));
3692 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3693 *hi = gen_rtx_REG (SImode, REGNO (reg));
3696 /* Return true if OP contains a symbol reference */
3699 symbolic_reference_mentioned_p (rtx op)
3704 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3707 fmt = GET_RTX_FORMAT (GET_CODE (op));
3708 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3714 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3715 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3719 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3726 /* Return true if OP contains a reference to a thread-local symbol. */
3729 tls_symbolic_reference_mentioned_p (rtx op)
3734 if (GET_CODE (op) == SYMBOL_REF)
3735 return tls_symbolic_operand (op);
3737 fmt = GET_RTX_FORMAT (GET_CODE (op));
3738 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3744 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3745 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3749 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3757 /* Return true if OP is a legitimate general operand when
3758 generating PIC code. It is given that flag_pic is on
3759 and that OP satisfies CONSTANT_P. */
3762 legitimate_pic_operand_p (rtx op)
3764 /* Accept all non-symbolic constants. */
3765 if (!SYMBOLIC_CONST (op))
3768 /* Reject everything else; must be handled
3769 via emit_symbolic_move. */
3773 /* Returns true if the constant value OP is a legitimate general operand.
3774 It is given that OP satisfies CONSTANT_P. */
3777 s390_legitimate_constant_p (machine_mode mode, rtx op)
3779 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3781 if (GET_MODE_SIZE (mode) != 16)
3784 if (!satisfies_constraint_j00 (op)
3785 && !satisfies_constraint_jm1 (op)
3786 && !satisfies_constraint_jKK (op)
3787 && !satisfies_constraint_jxx (op)
3788 && !satisfies_constraint_jyy (op))
3792 /* Accept all non-symbolic constants. */
3793 if (!SYMBOLIC_CONST (op))
3796 /* Accept immediate LARL operands. */
3797 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3800 /* Thread-local symbols are never legal constants. This is
3801 so that emit_call knows that computing such addresses
3802 might require a function call. */
3803 if (TLS_SYMBOLIC_CONST (op))
3806 /* In the PIC case, symbolic constants must *not* be
3807 forced into the literal pool. We accept them here,
3808 so that they will be handled by emit_symbolic_move. */
3812 /* All remaining non-PIC symbolic constants are
3813 forced into the literal pool. */
3817 /* Determine if it's legal to put X into the constant pool. This
3818 is not possible if X contains the address of a symbol that is
3819 not constant (TLS) or not known at final link time (PIC). */
3822 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3824 switch (GET_CODE (x))
3828 case CONST_WIDE_INT:
3830 /* Accept all non-symbolic constants. */
3834 /* Labels are OK iff we are non-PIC. */
3835 return flag_pic != 0;
3838 /* 'Naked' TLS symbol references are never OK,
3839 non-TLS symbols are OK iff we are non-PIC. */
3840 if (tls_symbolic_operand (x))
3843 return flag_pic != 0;
3846 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3849 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3850 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3853 switch (XINT (x, 1))
3855 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3856 case UNSPEC_LTREL_OFFSET:
3864 case UNSPEC_GOTNTPOFF:
3865 case UNSPEC_INDNTPOFF:
3868 /* If the literal pool shares the code section, be put
3869 execute template placeholders into the pool as well. */
3871 return TARGET_CPU_ZARCH;
3883 /* Returns true if the constant value OP is a legitimate general
3884 operand during and after reload. The difference to
3885 legitimate_constant_p is that this function will not accept
3886 a constant that would need to be forced to the literal pool
3887 before it can be used as operand.
3888 This function accepts all constants which can be loaded directly
3892 legitimate_reload_constant_p (rtx op)
3894 /* Accept la(y) operands. */
3895 if (GET_CODE (op) == CONST_INT
3896 && DISP_IN_RANGE (INTVAL (op)))
3899 /* Accept l(g)hi/l(g)fi operands. */
3900 if (GET_CODE (op) == CONST_INT
3901 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3904 /* Accept lliXX operands. */
3906 && GET_CODE (op) == CONST_INT
3907 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3908 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3912 && GET_CODE (op) == CONST_INT
3913 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3914 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3917 /* Accept larl operands. */
3918 if (TARGET_CPU_ZARCH
3919 && larl_operand (op, VOIDmode))
3922 /* Accept floating-point zero operands that fit into a single GPR. */
3923 if (GET_CODE (op) == CONST_DOUBLE
3924 && s390_float_const_zero_p (op)
3925 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3928 /* Accept double-word operands that can be split. */
3929 if (GET_CODE (op) == CONST_WIDE_INT
3930 || (GET_CODE (op) == CONST_INT
3931 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
3933 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
3934 rtx hi = operand_subword (op, 0, 0, dword_mode);
3935 rtx lo = operand_subword (op, 1, 0, dword_mode);
3936 return legitimate_reload_constant_p (hi)
3937 && legitimate_reload_constant_p (lo);
3940 /* Everything else cannot be handled without reload. */
3944 /* Returns true if the constant value OP is a legitimate fp operand
3945 during and after reload.
3946 This function accepts all constants which can be loaded directly
3950 legitimate_reload_fp_constant_p (rtx op)
3952 /* Accept floating-point zero operands if the load zero instruction
3953 can be used. Prior to z196 the load fp zero instruction caused a
3954 performance penalty if the result is used as BFP number. */
3956 && GET_CODE (op) == CONST_DOUBLE
3957 && s390_float_const_zero_p (op))
3963 /* Returns true if the constant value OP is a legitimate vector operand
3964 during and after reload.
3965 This function accepts all constants which can be loaded directly
3969 legitimate_reload_vector_constant_p (rtx op)
3971 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
3972 && (satisfies_constraint_j00 (op)
3973 || satisfies_constraint_jm1 (op)
3974 || satisfies_constraint_jKK (op)
3975 || satisfies_constraint_jxx (op)
3976 || satisfies_constraint_jyy (op)))
3982 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
3983 return the class of reg to actually use. */
3986 s390_preferred_reload_class (rtx op, reg_class_t rclass)
3988 switch (GET_CODE (op))
3990 /* Constants we cannot reload into general registers
3991 must be forced into the literal pool. */
3995 case CONST_WIDE_INT:
3996 if (reg_class_subset_p (GENERAL_REGS, rclass)
3997 && legitimate_reload_constant_p (op))
3998 return GENERAL_REGS;
3999 else if (reg_class_subset_p (ADDR_REGS, rclass)
4000 && legitimate_reload_constant_p (op))
4002 else if (reg_class_subset_p (FP_REGS, rclass)
4003 && legitimate_reload_fp_constant_p (op))
4005 else if (reg_class_subset_p (VEC_REGS, rclass)
4006 && legitimate_reload_vector_constant_p (op))
4011 /* If a symbolic constant or a PLUS is reloaded,
4012 it is most likely being used as an address, so
4013 prefer ADDR_REGS. If 'class' is not a superset
4014 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4016 /* Symrefs cannot be pushed into the literal pool with -fPIC
4017 so we *MUST NOT* return NO_REGS for these cases
4018 (s390_cannot_force_const_mem will return true).
4020 On the other hand we MUST return NO_REGS for symrefs with
4021 invalid addend which might have been pushed to the literal
4022 pool (no -fPIC). Usually we would expect them to be
4023 handled via secondary reload but this does not happen if
4024 they are used as literal pool slot replacement in reload
4025 inheritance (see emit_input_reload_insns). */
4026 if (TARGET_CPU_ZARCH
4027 && GET_CODE (XEXP (op, 0)) == PLUS
4028 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4029 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4031 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4039 if (!legitimate_reload_constant_p (op))
4043 /* load address will be used. */
4044 if (reg_class_subset_p (ADDR_REGS, rclass))
4056 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4057 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4061 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4063 HOST_WIDE_INT addend;
4066 /* The "required alignment" might be 0 (e.g. for certain structs
4067 accessed via BLKmode). Early abort in this case, as well as when
4068 an alignment > 8 is required. */
4069 if (alignment < 2 || alignment > 8)
4072 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4075 if (addend & (alignment - 1))
4078 if (GET_CODE (symref) == SYMBOL_REF)
4080 /* We have load-relative instructions for 2-byte, 4-byte, and
4081 8-byte alignment so allow only these. */
4084 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4085 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4086 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4087 default: return false;
4091 if (GET_CODE (symref) == UNSPEC
4092 && alignment <= UNITS_PER_LONG)
4098 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4099 operand SCRATCH is used to reload the even part of the address and
4103 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4105 HOST_WIDE_INT addend;
4108 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4112 /* Easy case. The addend is even so larl will do fine. */
4113 emit_move_insn (reg, addr);
4116 /* We can leave the scratch register untouched if the target
4117 register is a valid base register. */
4118 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4119 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4122 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4123 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4126 emit_move_insn (scratch,
4127 gen_rtx_CONST (Pmode,
4128 gen_rtx_PLUS (Pmode, symref,
4129 GEN_INT (addend - 1))));
4131 emit_move_insn (scratch, symref);
4133 /* Increment the address using la in order to avoid clobbering cc. */
4134 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4138 /* Generate what is necessary to move between REG and MEM using
4139 SCRATCH. The direction is given by TOMEM. */
4142 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4144 /* Reload might have pulled a constant out of the literal pool.
4145 Force it back in. */
4146 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4147 || GET_CODE (mem) == CONST_WIDE_INT
4148 || GET_CODE (mem) == CONST_VECTOR
4149 || GET_CODE (mem) == CONST)
4150 mem = force_const_mem (GET_MODE (reg), mem);
4152 gcc_assert (MEM_P (mem));
4154 /* For a load from memory we can leave the scratch register
4155 untouched if the target register is a valid base register. */
4157 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4158 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4159 && GET_MODE (reg) == GET_MODE (scratch))
4162 /* Load address into scratch register. Since we can't have a
4163 secondary reload for a secondary reload we have to cover the case
4164 where larl would need a secondary reload here as well. */
4165 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4167 /* Now we can use a standard load/store to do the move. */
4169 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4171 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4174 /* Inform reload about cases where moving X with a mode MODE to a register in
4175 RCLASS requires an extra scratch or immediate register. Return the class
4176 needed for the immediate register. */
4179 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4180 machine_mode mode, secondary_reload_info *sri)
4182 enum reg_class rclass = (enum reg_class) rclass_i;
4184 /* Intermediate register needed. */
4185 if (reg_classes_intersect_p (CC_REGS, rclass))
4186 return GENERAL_REGS;
4190 /* The vst/vl vector move instructions allow only for short
4193 && GET_CODE (XEXP (x, 0)) == PLUS
4194 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4195 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4196 && reg_class_subset_p (rclass, VEC_REGS)
4197 && (!reg_class_subset_p (rclass, FP_REGS)
4198 || (GET_MODE_SIZE (mode) > 8
4199 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4202 sri->icode = (TARGET_64BIT ?
4203 CODE_FOR_reloaddi_la_in :
4204 CODE_FOR_reloadsi_la_in);
4206 sri->icode = (TARGET_64BIT ?
4207 CODE_FOR_reloaddi_la_out :
4208 CODE_FOR_reloadsi_la_out);
4214 HOST_WIDE_INT offset;
4217 /* On z10 several optimizer steps may generate larl operands with
4220 && s390_loadrelative_operand_p (x, &symref, &offset)
4222 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4223 && (offset & 1) == 1)
4224 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4225 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4227 /* Handle all the (mem (symref)) accesses we cannot use the z10
4228 instructions for. */
4230 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4232 || !reg_class_subset_p (rclass, GENERAL_REGS)
4233 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4234 || !s390_check_symref_alignment (XEXP (x, 0),
4235 GET_MODE_SIZE (mode))))
4237 #define __SECONDARY_RELOAD_CASE(M,m) \
4240 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4241 CODE_FOR_reload##m##di_tomem_z10; \
4243 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4244 CODE_FOR_reload##m##si_tomem_z10; \
4247 switch (GET_MODE (x))
4249 __SECONDARY_RELOAD_CASE (QI, qi);
4250 __SECONDARY_RELOAD_CASE (HI, hi);
4251 __SECONDARY_RELOAD_CASE (SI, si);
4252 __SECONDARY_RELOAD_CASE (DI, di);
4253 __SECONDARY_RELOAD_CASE (TI, ti);
4254 __SECONDARY_RELOAD_CASE (SF, sf);
4255 __SECONDARY_RELOAD_CASE (DF, df);
4256 __SECONDARY_RELOAD_CASE (TF, tf);
4257 __SECONDARY_RELOAD_CASE (SD, sd);
4258 __SECONDARY_RELOAD_CASE (DD, dd);
4259 __SECONDARY_RELOAD_CASE (TD, td);
4260 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4261 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4262 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4263 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4264 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4265 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4266 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4267 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4268 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4269 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4270 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4271 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4272 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4273 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4274 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4275 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4276 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4277 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4278 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4279 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4280 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4284 #undef __SECONDARY_RELOAD_CASE
4288 /* We need a scratch register when loading a PLUS expression which
4289 is not a legitimate operand of the LOAD ADDRESS instruction. */
4290 /* LRA can deal with transformation of plus op very well -- so we
4291 don't need to prompt LRA in this case. */
4292 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4293 sri->icode = (TARGET_64BIT ?
4294 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4296 /* Performing a multiword move from or to memory we have to make sure the
4297 second chunk in memory is addressable without causing a displacement
4298 overflow. If that would be the case we calculate the address in
4299 a scratch register. */
4301 && GET_CODE (XEXP (x, 0)) == PLUS
4302 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4303 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4304 + GET_MODE_SIZE (mode) - 1))
4306 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4307 in a s_operand address since we may fallback to lm/stm. So we only
4308 have to care about overflows in the b+i+d case. */
4309 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4310 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4311 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4312 /* For FP_REGS no lm/stm is available so this check is triggered
4313 for displacement overflows in b+i+d and b+d like addresses. */
4314 || (reg_classes_intersect_p (FP_REGS, rclass)
4315 && s390_class_max_nregs (FP_REGS, mode) > 1))
4318 sri->icode = (TARGET_64BIT ?
4319 CODE_FOR_reloaddi_la_in :
4320 CODE_FOR_reloadsi_la_in);
4322 sri->icode = (TARGET_64BIT ?
4323 CODE_FOR_reloaddi_la_out :
4324 CODE_FOR_reloadsi_la_out);
4328 /* A scratch address register is needed when a symbolic constant is
4329 copied to r0 compiling with -fPIC. In other cases the target
4330 register might be used as temporary (see legitimize_pic_address). */
4331 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4332 sri->icode = (TARGET_64BIT ?
4333 CODE_FOR_reloaddi_PIC_addr :
4334 CODE_FOR_reloadsi_PIC_addr);
4336 /* Either scratch or no register needed. */
4340 /* Generate code to load SRC, which is PLUS that is not a
4341 legitimate operand for the LA instruction, into TARGET.
4342 SCRATCH may be used as scratch register. */
4345 s390_expand_plus_operand (rtx target, rtx src,
4349 struct s390_address ad;
4351 /* src must be a PLUS; get its two operands. */
4352 gcc_assert (GET_CODE (src) == PLUS);
4353 gcc_assert (GET_MODE (src) == Pmode);
4355 /* Check if any of the two operands is already scheduled
4356 for replacement by reload. This can happen e.g. when
4357 float registers occur in an address. */
4358 sum1 = find_replacement (&XEXP (src, 0));
4359 sum2 = find_replacement (&XEXP (src, 1));
4360 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4362 /* If the address is already strictly valid, there's nothing to do. */
4363 if (!s390_decompose_address (src, &ad)
4364 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4365 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4367 /* Otherwise, one of the operands cannot be an address register;
4368 we reload its value into the scratch register. */
4369 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4371 emit_move_insn (scratch, sum1);
4374 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4376 emit_move_insn (scratch, sum2);
4380 /* According to the way these invalid addresses are generated
4381 in reload.c, it should never happen (at least on s390) that
4382 *neither* of the PLUS components, after find_replacements
4383 was applied, is an address register. */
4384 if (sum1 == scratch && sum2 == scratch)
4390 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4393 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4394 is only ever performed on addresses, so we can mark the
4395 sum as legitimate for LA in any case. */
4396 s390_load_address (target, src);
4400 /* Return true if ADDR is a valid memory address.
4401 STRICT specifies whether strict register checking applies. */
4404 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4406 struct s390_address ad;
4409 && larl_operand (addr, VOIDmode)
4410 && (mode == VOIDmode
4411 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4414 if (!s390_decompose_address (addr, &ad))
4419 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4422 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4428 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4429 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4433 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4434 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4440 /* Return true if OP is a valid operand for the LA instruction.
4441 In 31-bit, we need to prove that the result is used as an
4442 address, as LA performs only a 31-bit addition. */
4445 legitimate_la_operand_p (rtx op)
4447 struct s390_address addr;
4448 if (!s390_decompose_address (op, &addr))
4451 return (TARGET_64BIT || addr.pointer);
4454 /* Return true if it is valid *and* preferable to use LA to
4455 compute the sum of OP1 and OP2. */
4458 preferred_la_operand_p (rtx op1, rtx op2)
4460 struct s390_address addr;
4462 if (op2 != const0_rtx)
4463 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4465 if (!s390_decompose_address (op1, &addr))
4467 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4469 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4472 /* Avoid LA instructions with index register on z196; it is
4473 preferable to use regular add instructions when possible.
4474 Starting with zEC12 the la with index register is "uncracked"
4476 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4479 if (!TARGET_64BIT && !addr.pointer)
4485 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4486 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4492 /* Emit a forced load-address operation to load SRC into DST.
4493 This will use the LOAD ADDRESS instruction even in situations
4494 where legitimate_la_operand_p (SRC) returns false. */
4497 s390_load_address (rtx dst, rtx src)
4500 emit_move_insn (dst, src);
4502 emit_insn (gen_force_la_31 (dst, src));
4505 /* Return a legitimate reference for ORIG (an address) using the
4506 register REG. If REG is 0, a new pseudo is generated.
4508 There are two types of references that must be handled:
4510 1. Global data references must load the address from the GOT, via
4511 the PIC reg. An insn is emitted to do this load, and the reg is
4514 2. Static data references, constant pool addresses, and code labels
4515 compute the address as an offset from the GOT, whose base is in
4516 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4517 differentiate them from global data objects. The returned
4518 address is the PIC reg + an unspec constant.
4520 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4521 reg also appears in the address. */
4524 legitimize_pic_address (rtx orig, rtx reg)
4527 rtx addend = const0_rtx;
4530 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4532 if (GET_CODE (addr) == CONST)
4533 addr = XEXP (addr, 0);
4535 if (GET_CODE (addr) == PLUS)
4537 addend = XEXP (addr, 1);
4538 addr = XEXP (addr, 0);
4541 if ((GET_CODE (addr) == LABEL_REF
4542 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
4543 || (GET_CODE (addr) == UNSPEC &&
4544 (XINT (addr, 1) == UNSPEC_GOTENT
4545 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4546 && GET_CODE (addend) == CONST_INT)
4548 /* This can be locally addressed. */
4550 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4551 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4552 gen_rtx_CONST (Pmode, addr) : addr);
4554 if (TARGET_CPU_ZARCH
4555 && larl_operand (const_addr, VOIDmode)
4556 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
4557 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
4559 if (INTVAL (addend) & 1)
4561 /* LARL can't handle odd offsets, so emit a pair of LARL
4563 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4565 if (!DISP_IN_RANGE (INTVAL (addend)))
4567 HOST_WIDE_INT even = INTVAL (addend) - 1;
4568 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4569 addr = gen_rtx_CONST (Pmode, addr);
4570 addend = const1_rtx;
4573 emit_move_insn (temp, addr);
4574 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4578 s390_load_address (reg, new_rtx);
4584 /* If the offset is even, we can just use LARL. This
4585 will happen automatically. */
4590 /* No larl - Access local symbols relative to the GOT. */
4592 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4594 if (reload_in_progress || reload_completed)
4595 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4597 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4598 if (addend != const0_rtx)
4599 addr = gen_rtx_PLUS (Pmode, addr, addend);
4600 addr = gen_rtx_CONST (Pmode, addr);
4601 addr = force_const_mem (Pmode, addr);
4602 emit_move_insn (temp, addr);
4604 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4607 s390_load_address (reg, new_rtx);
4612 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4614 /* A non-local symbol reference without addend.
4616 The symbol ref is wrapped into an UNSPEC to make sure the
4617 proper operand modifier (@GOT or @GOTENT) will be emitted.
4618 This will tell the linker to put the symbol into the GOT.
4620 Additionally the code dereferencing the GOT slot is emitted here.
4622 An addend to the symref needs to be added afterwards.
4623 legitimize_pic_address calls itself recursively to handle
4624 that case. So no need to do it here. */
4627 reg = gen_reg_rtx (Pmode);
4631 /* Use load relative if possible.
4632 lgrl <target>, sym@GOTENT */
4633 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4634 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4635 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4637 emit_move_insn (reg, new_rtx);
4640 else if (flag_pic == 1)
4642 /* Assume GOT offset is a valid displacement operand (< 4k
4643 or < 512k with z990). This is handled the same way in
4644 both 31- and 64-bit code (@GOT).
4645 lg <target>, sym@GOT(r12) */
4647 if (reload_in_progress || reload_completed)
4648 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4650 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4651 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4652 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4653 new_rtx = gen_const_mem (Pmode, new_rtx);
4654 emit_move_insn (reg, new_rtx);
4657 else if (TARGET_CPU_ZARCH)
4659 /* If the GOT offset might be >= 4k, we determine the position
4660 of the GOT entry via a PC-relative LARL (@GOTENT).
4661 larl temp, sym@GOTENT
4662 lg <target>, 0(temp) */
4664 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4666 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4667 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4669 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4670 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4671 emit_move_insn (temp, new_rtx);
4673 new_rtx = gen_const_mem (Pmode, temp);
4674 emit_move_insn (reg, new_rtx);
4680 /* If the GOT offset might be >= 4k, we have to load it
4681 from the literal pool (@GOT).
4683 lg temp, lit-litbase(r13)
4684 lg <target>, 0(temp)
4685 lit: .long sym@GOT */
4687 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4689 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4690 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4692 if (reload_in_progress || reload_completed)
4693 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4695 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4696 addr = gen_rtx_CONST (Pmode, addr);
4697 addr = force_const_mem (Pmode, addr);
4698 emit_move_insn (temp, addr);
4700 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4701 new_rtx = gen_const_mem (Pmode, new_rtx);
4702 emit_move_insn (reg, new_rtx);
4706 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4708 gcc_assert (XVECLEN (addr, 0) == 1);
4709 switch (XINT (addr, 1))
4711 /* These address symbols (or PLT slots) relative to the GOT
4712 (not GOT slots!). In general this will exceed the
4713 displacement range so these value belong into the literal
4717 new_rtx = force_const_mem (Pmode, orig);
4720 /* For -fPIC the GOT size might exceed the displacement
4721 range so make sure the value is in the literal pool. */
4724 new_rtx = force_const_mem (Pmode, orig);
4727 /* For @GOTENT larl is used. This is handled like local
4733 /* @PLT is OK as is on 64-bit, must be converted to
4734 GOT-relative @PLTOFF on 31-bit. */
4736 if (!TARGET_CPU_ZARCH)
4738 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4740 if (reload_in_progress || reload_completed)
4741 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4743 addr = XVECEXP (addr, 0, 0);
4744 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4746 if (addend != const0_rtx)
4747 addr = gen_rtx_PLUS (Pmode, addr, addend);
4748 addr = gen_rtx_CONST (Pmode, addr);
4749 addr = force_const_mem (Pmode, addr);
4750 emit_move_insn (temp, addr);
4752 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4755 s390_load_address (reg, new_rtx);
4760 /* On 64 bit larl can be used. This case is handled like
4761 local symbol refs. */
4765 /* Everything else cannot happen. */
4770 else if (addend != const0_rtx)
4772 /* Otherwise, compute the sum. */
4774 rtx base = legitimize_pic_address (addr, reg);
4775 new_rtx = legitimize_pic_address (addend,
4776 base == reg ? NULL_RTX : reg);
4777 if (GET_CODE (new_rtx) == CONST_INT)
4778 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4781 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4783 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4784 new_rtx = XEXP (new_rtx, 1);
4786 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4789 if (GET_CODE (new_rtx) == CONST)
4790 new_rtx = XEXP (new_rtx, 0);
4791 new_rtx = force_operand (new_rtx, 0);
4797 /* Load the thread pointer into a register. */
4800 s390_get_thread_pointer (void)
4802 rtx tp = gen_reg_rtx (Pmode);
4804 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4805 mark_reg_pointer (tp, BITS_PER_WORD);
4810 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4811 in s390_tls_symbol which always refers to __tls_get_offset.
4812 The returned offset is written to RESULT_REG and an USE rtx is
4813 generated for TLS_CALL. */
4815 static GTY(()) rtx s390_tls_symbol;
4818 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4823 emit_insn (s390_load_got ());
4825 if (!s390_tls_symbol)
4826 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4828 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4829 gen_rtx_REG (Pmode, RETURN_REGNUM));
4831 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4832 RTL_CONST_CALL_P (insn) = 1;
4835 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4836 this (thread-local) address. REG may be used as temporary. */
4839 legitimize_tls_address (rtx addr, rtx reg)
4841 rtx new_rtx, tls_call, temp, base, r2, insn;
4843 if (GET_CODE (addr) == SYMBOL_REF)
4844 switch (tls_symbolic_operand (addr))
4846 case TLS_MODEL_GLOBAL_DYNAMIC:
4848 r2 = gen_rtx_REG (Pmode, 2);
4849 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4850 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4851 new_rtx = force_const_mem (Pmode, new_rtx);
4852 emit_move_insn (r2, new_rtx);
4853 s390_emit_tls_call_insn (r2, tls_call);
4854 insn = get_insns ();
4857 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4858 temp = gen_reg_rtx (Pmode);
4859 emit_libcall_block (insn, temp, r2, new_rtx);
4861 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4864 s390_load_address (reg, new_rtx);
4869 case TLS_MODEL_LOCAL_DYNAMIC:
4871 r2 = gen_rtx_REG (Pmode, 2);
4872 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4873 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4874 new_rtx = force_const_mem (Pmode, new_rtx);
4875 emit_move_insn (r2, new_rtx);
4876 s390_emit_tls_call_insn (r2, tls_call);
4877 insn = get_insns ();
4880 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4881 temp = gen_reg_rtx (Pmode);
4882 emit_libcall_block (insn, temp, r2, new_rtx);
4884 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4885 base = gen_reg_rtx (Pmode);
4886 s390_load_address (base, new_rtx);
4888 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4889 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4890 new_rtx = force_const_mem (Pmode, new_rtx);
4891 temp = gen_reg_rtx (Pmode);
4892 emit_move_insn (temp, new_rtx);
4894 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4897 s390_load_address (reg, new_rtx);
4902 case TLS_MODEL_INITIAL_EXEC:
4905 /* Assume GOT offset < 4k. This is handled the same way
4906 in both 31- and 64-bit code. */
4908 if (reload_in_progress || reload_completed)
4909 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4911 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4912 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4913 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4914 new_rtx = gen_const_mem (Pmode, new_rtx);
4915 temp = gen_reg_rtx (Pmode);
4916 emit_move_insn (temp, new_rtx);
4918 else if (TARGET_CPU_ZARCH)
4920 /* If the GOT offset might be >= 4k, we determine the position
4921 of the GOT entry via a PC-relative LARL. */
4923 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4924 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4925 temp = gen_reg_rtx (Pmode);
4926 emit_move_insn (temp, new_rtx);
4928 new_rtx = gen_const_mem (Pmode, temp);
4929 temp = gen_reg_rtx (Pmode);
4930 emit_move_insn (temp, new_rtx);
4934 /* If the GOT offset might be >= 4k, we have to load it
4935 from the literal pool. */
4937 if (reload_in_progress || reload_completed)
4938 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4940 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4941 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4942 new_rtx = force_const_mem (Pmode, new_rtx);
4943 temp = gen_reg_rtx (Pmode);
4944 emit_move_insn (temp, new_rtx);
4946 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4947 new_rtx = gen_const_mem (Pmode, new_rtx);
4949 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4950 temp = gen_reg_rtx (Pmode);
4951 emit_insn (gen_rtx_SET (temp, new_rtx));
4955 /* In position-dependent code, load the absolute address of
4956 the GOT entry from the literal pool. */
4958 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4959 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4960 new_rtx = force_const_mem (Pmode, new_rtx);
4961 temp = gen_reg_rtx (Pmode);
4962 emit_move_insn (temp, new_rtx);
4965 new_rtx = gen_const_mem (Pmode, new_rtx);
4966 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4967 temp = gen_reg_rtx (Pmode);
4968 emit_insn (gen_rtx_SET (temp, new_rtx));
4971 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4974 s390_load_address (reg, new_rtx);
4979 case TLS_MODEL_LOCAL_EXEC:
4980 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4981 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4982 new_rtx = force_const_mem (Pmode, new_rtx);
4983 temp = gen_reg_rtx (Pmode);
4984 emit_move_insn (temp, new_rtx);
4986 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4989 s390_load_address (reg, new_rtx);
4998 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5000 switch (XINT (XEXP (addr, 0), 1))
5002 case UNSPEC_INDNTPOFF:
5003 gcc_assert (TARGET_CPU_ZARCH);
5012 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5013 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5015 new_rtx = XEXP (XEXP (addr, 0), 0);
5016 if (GET_CODE (new_rtx) != SYMBOL_REF)
5017 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5019 new_rtx = legitimize_tls_address (new_rtx, reg);
5020 new_rtx = plus_constant (Pmode, new_rtx,
5021 INTVAL (XEXP (XEXP (addr, 0), 1)));
5022 new_rtx = force_operand (new_rtx, 0);
5026 gcc_unreachable (); /* for now ... */
5031 /* Emit insns making the address in operands[1] valid for a standard
5032 move to operands[0]. operands[1] is replaced by an address which
5033 should be used instead of the former RTX to emit the move
5037 emit_symbolic_move (rtx *operands)
5039 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5041 if (GET_CODE (operands[0]) == MEM)
5042 operands[1] = force_reg (Pmode, operands[1]);
5043 else if (TLS_SYMBOLIC_CONST (operands[1]))
5044 operands[1] = legitimize_tls_address (operands[1], temp);
5046 operands[1] = legitimize_pic_address (operands[1], temp);
5049 /* Try machine-dependent ways of modifying an illegitimate address X
5050 to be legitimate. If we find one, return the new, valid address.
5052 OLDX is the address as it was before break_out_memory_refs was called.
5053 In some cases it is useful to look at this to decide what needs to be done.
5055 MODE is the mode of the operand pointed to by X.
5057 When -fpic is used, special handling is needed for symbolic references.
5058 See comments by legitimize_pic_address for details. */
5061 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5062 machine_mode mode ATTRIBUTE_UNUSED)
5064 rtx constant_term = const0_rtx;
5066 if (TLS_SYMBOLIC_CONST (x))
5068 x = legitimize_tls_address (x, 0);
5070 if (s390_legitimate_address_p (mode, x, FALSE))
5073 else if (GET_CODE (x) == PLUS
5074 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5075 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5081 if (SYMBOLIC_CONST (x)
5082 || (GET_CODE (x) == PLUS
5083 && (SYMBOLIC_CONST (XEXP (x, 0))
5084 || SYMBOLIC_CONST (XEXP (x, 1)))))
5085 x = legitimize_pic_address (x, 0);
5087 if (s390_legitimate_address_p (mode, x, FALSE))
5091 x = eliminate_constant_term (x, &constant_term);
5093 /* Optimize loading of large displacements by splitting them
5094 into the multiple of 4K and the rest; this allows the
5095 former to be CSE'd if possible.
5097 Don't do this if the displacement is added to a register
5098 pointing into the stack frame, as the offsets will
5099 change later anyway. */
5101 if (GET_CODE (constant_term) == CONST_INT
5102 && !TARGET_LONG_DISPLACEMENT
5103 && !DISP_IN_RANGE (INTVAL (constant_term))
5104 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5106 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5107 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5109 rtx temp = gen_reg_rtx (Pmode);
5110 rtx val = force_operand (GEN_INT (upper), temp);
5112 emit_move_insn (temp, val);
5114 x = gen_rtx_PLUS (Pmode, x, temp);
5115 constant_term = GEN_INT (lower);
5118 if (GET_CODE (x) == PLUS)
5120 if (GET_CODE (XEXP (x, 0)) == REG)
5122 rtx temp = gen_reg_rtx (Pmode);
5123 rtx val = force_operand (XEXP (x, 1), temp);
5125 emit_move_insn (temp, val);
5127 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5130 else if (GET_CODE (XEXP (x, 1)) == REG)
5132 rtx temp = gen_reg_rtx (Pmode);
5133 rtx val = force_operand (XEXP (x, 0), temp);
5135 emit_move_insn (temp, val);
5137 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5141 if (constant_term != const0_rtx)
5142 x = gen_rtx_PLUS (Pmode, x, constant_term);
5147 /* Try a machine-dependent way of reloading an illegitimate address AD
5148 operand. If we find one, push the reload and return the new address.
5150 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5151 and TYPE is the reload type of the current reload. */
5154 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5155 int opnum, int type)
5157 if (!optimize || TARGET_LONG_DISPLACEMENT)
5160 if (GET_CODE (ad) == PLUS)
5162 rtx tem = simplify_binary_operation (PLUS, Pmode,
5163 XEXP (ad, 0), XEXP (ad, 1));
5168 if (GET_CODE (ad) == PLUS
5169 && GET_CODE (XEXP (ad, 0)) == REG
5170 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5171 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5173 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5174 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5175 rtx cst, tem, new_rtx;
5177 cst = GEN_INT (upper);
5178 if (!legitimate_reload_constant_p (cst))
5179 cst = force_const_mem (Pmode, cst);
5181 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5182 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5184 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5185 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5186 opnum, (enum reload_type) type);
5193 /* Emit code to move LEN bytes from DST to SRC. */
5196 s390_expand_movmem (rtx dst, rtx src, rtx len)
5198 /* When tuning for z10 or higher we rely on the Glibc functions to
5199 do the right thing. Only for constant lengths below 64k we will
5200 generate inline code. */
5201 if (s390_tune >= PROCESSOR_2097_Z10
5202 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5205 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5207 if (INTVAL (len) > 0)
5208 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
5211 else if (TARGET_MVCLE)
5213 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5218 rtx dst_addr, src_addr, count, blocks, temp;
5219 rtx_code_label *loop_start_label = gen_label_rtx ();
5220 rtx_code_label *loop_end_label = gen_label_rtx ();
5221 rtx_code_label *end_label = gen_label_rtx ();
5224 mode = GET_MODE (len);
5225 if (mode == VOIDmode)
5228 dst_addr = gen_reg_rtx (Pmode);
5229 src_addr = gen_reg_rtx (Pmode);
5230 count = gen_reg_rtx (mode);
5231 blocks = gen_reg_rtx (mode);
5233 convert_move (count, len, 1);
5234 emit_cmp_and_jump_insns (count, const0_rtx,
5235 EQ, NULL_RTX, mode, 1, end_label);
5237 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5238 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5239 dst = change_address (dst, VOIDmode, dst_addr);
5240 src = change_address (src, VOIDmode, src_addr);
5242 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5245 emit_move_insn (count, temp);
5247 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5250 emit_move_insn (blocks, temp);
5252 emit_cmp_and_jump_insns (blocks, const0_rtx,
5253 EQ, NULL_RTX, mode, 1, loop_end_label);
5255 emit_label (loop_start_label);
5258 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5262 /* Issue a read prefetch for the +3 cache line. */
5263 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5264 const0_rtx, const0_rtx);
5265 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5266 emit_insn (prefetch);
5268 /* Issue a write prefetch for the +3 cache line. */
5269 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5270 const1_rtx, const0_rtx);
5271 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5272 emit_insn (prefetch);
5275 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5276 s390_load_address (dst_addr,
5277 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5278 s390_load_address (src_addr,
5279 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5281 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5284 emit_move_insn (blocks, temp);
5286 emit_cmp_and_jump_insns (blocks, const0_rtx,
5287 EQ, NULL_RTX, mode, 1, loop_end_label);
5289 emit_jump (loop_start_label);
5290 emit_label (loop_end_label);
5292 emit_insn (gen_movmem_short (dst, src,
5293 convert_to_mode (Pmode, count, 1)));
5294 emit_label (end_label);
5299 /* Emit code to set LEN bytes at DST to VAL.
5300 Make use of clrmem if VAL is zero. */
5303 s390_expand_setmem (rtx dst, rtx len, rtx val)
5305 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
5308 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5310 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
5312 if (val == const0_rtx && INTVAL (len) <= 256)
5313 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
5316 /* Initialize memory by storing the first byte. */
5317 emit_move_insn (adjust_address (dst, QImode, 0), val);
5319 if (INTVAL (len) > 1)
5321 /* Initiate 1 byte overlap move.
5322 The first byte of DST is propagated through DSTP1.
5323 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
5324 DST is set to size 1 so the rest of the memory location
5325 does not count as source operand. */
5326 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
5327 set_mem_size (dst, 1);
5329 emit_insn (gen_movmem_short (dstp1, dst,
5330 GEN_INT (INTVAL (len) - 2)));
5335 else if (TARGET_MVCLE)
5337 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5339 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5342 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5348 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5349 rtx_code_label *loop_start_label = gen_label_rtx ();
5350 rtx_code_label *loop_end_label = gen_label_rtx ();
5351 rtx_code_label *end_label = gen_label_rtx ();
5354 mode = GET_MODE (len);
5355 if (mode == VOIDmode)
5358 dst_addr = gen_reg_rtx (Pmode);
5359 count = gen_reg_rtx (mode);
5360 blocks = gen_reg_rtx (mode);
5362 convert_move (count, len, 1);
5363 emit_cmp_and_jump_insns (count, const0_rtx,
5364 EQ, NULL_RTX, mode, 1, end_label);
5366 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5367 dst = change_address (dst, VOIDmode, dst_addr);
5369 if (val == const0_rtx)
5370 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5374 dstp1 = adjust_address (dst, VOIDmode, 1);
5375 set_mem_size (dst, 1);
5377 /* Initialize memory by storing the first byte. */
5378 emit_move_insn (adjust_address (dst, QImode, 0), val);
5380 /* If count is 1 we are done. */
5381 emit_cmp_and_jump_insns (count, const1_rtx,
5382 EQ, NULL_RTX, mode, 1, end_label);
5384 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
5388 emit_move_insn (count, temp);
5390 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5393 emit_move_insn (blocks, temp);
5395 emit_cmp_and_jump_insns (blocks, const0_rtx,
5396 EQ, NULL_RTX, mode, 1, loop_end_label);
5398 emit_label (loop_start_label);
5401 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5403 /* Issue a write prefetch for the +4 cache line. */
5404 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5406 const1_rtx, const0_rtx);
5407 emit_insn (prefetch);
5408 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5411 if (val == const0_rtx)
5412 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5414 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
5415 s390_load_address (dst_addr,
5416 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5418 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5421 emit_move_insn (blocks, temp);
5423 emit_cmp_and_jump_insns (blocks, const0_rtx,
5424 EQ, NULL_RTX, mode, 1, loop_end_label);
5426 emit_jump (loop_start_label);
5427 emit_label (loop_end_label);
5429 if (val == const0_rtx)
5430 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5432 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
5433 emit_label (end_label);
5437 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5438 and return the result in TARGET. */
5441 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5443 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5446 /* When tuning for z10 or higher we rely on the Glibc functions to
5447 do the right thing. Only for constant lengths below 64k we will
5448 generate inline code. */
5449 if (s390_tune >= PROCESSOR_2097_Z10
5450 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5453 /* As the result of CMPINT is inverted compared to what we need,
5454 we have to swap the operands. */
5455 tmp = op0; op0 = op1; op1 = tmp;
5457 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5459 if (INTVAL (len) > 0)
5461 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5462 emit_insn (gen_cmpint (target, ccreg));
5465 emit_move_insn (target, const0_rtx);
5467 else if (TARGET_MVCLE)
5469 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5470 emit_insn (gen_cmpint (target, ccreg));
5474 rtx addr0, addr1, count, blocks, temp;
5475 rtx_code_label *loop_start_label = gen_label_rtx ();
5476 rtx_code_label *loop_end_label = gen_label_rtx ();
5477 rtx_code_label *end_label = gen_label_rtx ();
5480 mode = GET_MODE (len);
5481 if (mode == VOIDmode)
5484 addr0 = gen_reg_rtx (Pmode);
5485 addr1 = gen_reg_rtx (Pmode);
5486 count = gen_reg_rtx (mode);
5487 blocks = gen_reg_rtx (mode);
5489 convert_move (count, len, 1);
5490 emit_cmp_and_jump_insns (count, const0_rtx,
5491 EQ, NULL_RTX, mode, 1, end_label);
5493 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5494 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5495 op0 = change_address (op0, VOIDmode, addr0);
5496 op1 = change_address (op1, VOIDmode, addr1);
5498 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5501 emit_move_insn (count, temp);
5503 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5506 emit_move_insn (blocks, temp);
5508 emit_cmp_and_jump_insns (blocks, const0_rtx,
5509 EQ, NULL_RTX, mode, 1, loop_end_label);
5511 emit_label (loop_start_label);
5514 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5518 /* Issue a read prefetch for the +2 cache line of operand 1. */
5519 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5520 const0_rtx, const0_rtx);
5521 emit_insn (prefetch);
5522 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5524 /* Issue a read prefetch for the +2 cache line of operand 2. */
5525 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5526 const0_rtx, const0_rtx);
5527 emit_insn (prefetch);
5528 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5531 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5532 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5533 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5534 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5535 temp = gen_rtx_SET (pc_rtx, temp);
5536 emit_jump_insn (temp);
5538 s390_load_address (addr0,
5539 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5540 s390_load_address (addr1,
5541 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5543 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5546 emit_move_insn (blocks, temp);
5548 emit_cmp_and_jump_insns (blocks, const0_rtx,
5549 EQ, NULL_RTX, mode, 1, loop_end_label);
5551 emit_jump (loop_start_label);
5552 emit_label (loop_end_label);
5554 emit_insn (gen_cmpmem_short (op0, op1,
5555 convert_to_mode (Pmode, count, 1)));
5556 emit_label (end_label);
5558 emit_insn (gen_cmpint (target, ccreg));
5563 /* Emit a conditional jump to LABEL for condition code mask MASK using
5564 comparsion operator COMPARISON. Return the emitted jump insn. */
5567 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5571 gcc_assert (comparison == EQ || comparison == NE);
5572 gcc_assert (mask > 0 && mask < 15);
5574 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5575 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5576 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5577 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5578 temp = gen_rtx_SET (pc_rtx, temp);
5579 return emit_jump_insn (temp);
5582 /* Emit the instructions to implement strlen of STRING and store the
5583 result in TARGET. The string has the known ALIGNMENT. This
5584 version uses vector instructions and is therefore not appropriate
5585 for targets prior to z13. */
5588 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5590 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5591 int very_likely = REG_BR_PROB_BASE - 1;
5592 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5593 rtx str_reg = gen_reg_rtx (V16QImode);
5594 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5595 rtx str_idx_reg = gen_reg_rtx (Pmode);
5596 rtx result_reg = gen_reg_rtx (V16QImode);
5597 rtx is_aligned_label = gen_label_rtx ();
5598 rtx into_loop_label = NULL_RTX;
5599 rtx loop_start_label = gen_label_rtx ();
5601 rtx len = gen_reg_rtx (QImode);
5604 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5605 emit_move_insn (str_idx_reg, const0_rtx);
5607 if (INTVAL (alignment) < 16)
5609 /* Check whether the address happens to be aligned properly so
5610 jump directly to the aligned loop. */
5611 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5612 str_addr_base_reg, GEN_INT (15)),
5613 const0_rtx, EQ, NULL_RTX,
5614 Pmode, 1, is_aligned_label);
5616 temp = gen_reg_rtx (Pmode);
5617 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5618 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5619 gcc_assert (REG_P (temp));
5620 highest_index_to_load_reg =
5621 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5622 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5623 gcc_assert (REG_P (highest_index_to_load_reg));
5624 emit_insn (gen_vllv16qi (str_reg,
5625 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5626 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5628 into_loop_label = gen_label_rtx ();
5629 s390_emit_jump (into_loop_label, NULL_RTX);
5633 emit_label (is_aligned_label);
5634 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5636 /* Reaching this point we are only performing 16 bytes aligned
5638 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5640 emit_label (loop_start_label);
5641 LABEL_NUSES (loop_start_label) = 1;
5643 /* Load 16 bytes of the string into VR. */
5644 emit_move_insn (str_reg,
5645 gen_rtx_MEM (V16QImode,
5646 gen_rtx_PLUS (Pmode, str_idx_reg,
5647 str_addr_base_reg)));
5648 if (into_loop_label != NULL_RTX)
5650 emit_label (into_loop_label);
5651 LABEL_NUSES (into_loop_label) = 1;
5654 /* Increment string index by 16 bytes. */
5655 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5656 str_idx_reg, 1, OPTAB_DIRECT);
5658 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5659 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5661 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5662 REG_BR_PROB, very_likely);
5663 emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
5665 /* If the string pointer wasn't aligned we have loaded less then 16
5666 bytes and the remaining bytes got filled with zeros (by vll).
5667 Now we have to check whether the resulting index lies within the
5668 bytes actually part of the string. */
5670 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5671 highest_index_to_load_reg);
5672 s390_load_address (highest_index_to_load_reg,
5673 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5676 emit_insn (gen_movdicc (str_idx_reg, cond,
5677 highest_index_to_load_reg, str_idx_reg));
5679 emit_insn (gen_movsicc (str_idx_reg, cond,
5680 highest_index_to_load_reg, str_idx_reg));
5682 add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
5685 expand_binop (Pmode, add_optab, str_idx_reg,
5686 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5687 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5689 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5690 convert_to_mode (Pmode, len, 1),
5691 target, 1, OPTAB_DIRECT);
5693 emit_move_insn (target, temp);
5697 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5699 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5700 rtx temp = gen_reg_rtx (Pmode);
5701 rtx src_addr = XEXP (src, 0);
5702 rtx dst_addr = XEXP (dst, 0);
5703 rtx src_addr_reg = gen_reg_rtx (Pmode);
5704 rtx dst_addr_reg = gen_reg_rtx (Pmode);
5705 rtx offset = gen_reg_rtx (Pmode);
5706 rtx vsrc = gen_reg_rtx (V16QImode);
5707 rtx vpos = gen_reg_rtx (V16QImode);
5708 rtx loadlen = gen_reg_rtx (SImode);
5709 rtx gpos_qi = gen_reg_rtx(QImode);
5710 rtx gpos = gen_reg_rtx (SImode);
5711 rtx done_label = gen_label_rtx ();
5712 rtx loop_label = gen_label_rtx ();
5713 rtx exit_label = gen_label_rtx ();
5714 rtx full_label = gen_label_rtx ();
5716 /* Perform a quick check for string ending on the first up to 16
5717 bytes and exit early if successful. */
5719 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5720 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5721 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5722 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5723 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5724 /* gpos is the byte index if a zero was found and 16 otherwise.
5725 So if it is lower than the loaded bytes we have a hit. */
5726 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5728 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5730 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5732 emit_jump (exit_label);
5735 emit_label (full_label);
5736 LABEL_NUSES (full_label) = 1;
5738 /* Calculate `offset' so that src + offset points to the last byte
5739 before 16 byte alignment. */
5741 /* temp = src_addr & 0xf */
5742 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5745 /* offset = 0xf - temp */
5746 emit_move_insn (offset, GEN_INT (15));
5747 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5750 /* Store `offset' bytes in the dstination string. The quick check
5751 has loaded at least `offset' bytes into vsrc. */
5753 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5755 /* Advance to the next byte to be loaded. */
5756 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5759 /* Make sure the addresses are single regs which can be used as a
5761 emit_move_insn (src_addr_reg, src_addr);
5762 emit_move_insn (dst_addr_reg, dst_addr);
5766 emit_label (loop_label);
5767 LABEL_NUSES (loop_label) = 1;
5769 emit_move_insn (vsrc,
5770 gen_rtx_MEM (V16QImode,
5771 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
5773 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
5774 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5775 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
5776 REG_BR_PROB, very_unlikely);
5778 emit_move_insn (gen_rtx_MEM (V16QImode,
5779 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
5782 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
5783 offset, 1, OPTAB_DIRECT);
5785 emit_jump (loop_label);
5790 /* We are done. Add the offset of the zero character to the dst_addr
5791 pointer to get the result. */
5793 emit_label (done_label);
5794 LABEL_NUSES (done_label) = 1;
5796 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
5799 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5800 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5802 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
5804 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
5809 emit_label (exit_label);
5810 LABEL_NUSES (exit_label) = 1;
5814 /* Expand conditional increment or decrement using alc/slb instructions.
5815 Should generate code setting DST to either SRC or SRC + INCREMENT,
5816 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5817 Returns true if successful, false otherwise.
5819 That makes it possible to implement some if-constructs without jumps e.g.:
5820 (borrow = CC0 | CC1 and carry = CC2 | CC3)
5821 unsigned int a, b, c;
5822 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
5823 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
5824 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
5825 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
5827 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5828 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
5829 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5830 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
5831 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
5834 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5835 rtx dst, rtx src, rtx increment)
5837 machine_mode cmp_mode;
5838 machine_mode cc_mode;
5844 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
5845 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
5847 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
5848 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
5853 /* Try ADD LOGICAL WITH CARRY. */
5854 if (increment == const1_rtx)
5856 /* Determine CC mode to use. */
5857 if (cmp_code == EQ || cmp_code == NE)
5859 if (cmp_op1 != const0_rtx)
5861 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5862 NULL_RTX, 0, OPTAB_WIDEN);
5863 cmp_op1 = const0_rtx;
5866 cmp_code = cmp_code == EQ ? LEU : GTU;
5869 if (cmp_code == LTU || cmp_code == LEU)
5874 cmp_code = swap_condition (cmp_code);
5891 /* Emit comparison instruction pattern. */
5892 if (!register_operand (cmp_op0, cmp_mode))
5893 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5895 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5896 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5897 /* We use insn_invalid_p here to add clobbers if required. */
5898 ret = insn_invalid_p (emit_insn (insn), false);
5901 /* Emit ALC instruction pattern. */
5902 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5903 gen_rtx_REG (cc_mode, CC_REGNUM),
5906 if (src != const0_rtx)
5908 if (!register_operand (src, GET_MODE (dst)))
5909 src = force_reg (GET_MODE (dst), src);
5911 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
5912 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
5915 p = rtvec_alloc (2);
5917 gen_rtx_SET (dst, op_res);
5919 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5920 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5925 /* Try SUBTRACT LOGICAL WITH BORROW. */
5926 if (increment == constm1_rtx)
5928 /* Determine CC mode to use. */
5929 if (cmp_code == EQ || cmp_code == NE)
5931 if (cmp_op1 != const0_rtx)
5933 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5934 NULL_RTX, 0, OPTAB_WIDEN);
5935 cmp_op1 = const0_rtx;
5938 cmp_code = cmp_code == EQ ? LEU : GTU;
5941 if (cmp_code == GTU || cmp_code == GEU)
5946 cmp_code = swap_condition (cmp_code);
5963 /* Emit comparison instruction pattern. */
5964 if (!register_operand (cmp_op0, cmp_mode))
5965 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5967 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5968 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5969 /* We use insn_invalid_p here to add clobbers if required. */
5970 ret = insn_invalid_p (emit_insn (insn), false);
5973 /* Emit SLB instruction pattern. */
5974 if (!register_operand (src, GET_MODE (dst)))
5975 src = force_reg (GET_MODE (dst), src);
5977 op_res = gen_rtx_MINUS (GET_MODE (dst),
5978 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
5979 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5980 gen_rtx_REG (cc_mode, CC_REGNUM),
5982 p = rtvec_alloc (2);
5984 gen_rtx_SET (dst, op_res);
5986 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5987 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5995 /* Expand code for the insv template. Return true if successful. */
5998 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6000 int bitsize = INTVAL (op1);
6001 int bitpos = INTVAL (op2);
6002 machine_mode mode = GET_MODE (dest);
6004 int smode_bsize, mode_bsize;
6007 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6010 /* Generate INSERT IMMEDIATE (IILL et al). */
6011 /* (set (ze (reg)) (const_int)). */
6013 && register_operand (dest, word_mode)
6014 && (bitpos % 16) == 0
6015 && (bitsize % 16) == 0
6016 && const_int_operand (src, VOIDmode))
6018 HOST_WIDE_INT val = INTVAL (src);
6019 int regpos = bitpos + bitsize;
6021 while (regpos > bitpos)
6023 machine_mode putmode;
6026 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6031 putsize = GET_MODE_BITSIZE (putmode);
6033 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6036 gen_int_mode (val, putmode));
6039 gcc_assert (regpos == bitpos);
6043 smode = smallest_mode_for_size (bitsize, MODE_INT);
6044 smode_bsize = GET_MODE_BITSIZE (smode);
6045 mode_bsize = GET_MODE_BITSIZE (mode);
6047 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6049 && (bitsize % BITS_PER_UNIT) == 0
6051 && (register_operand (src, word_mode)
6052 || const_int_operand (src, VOIDmode)))
6054 /* Emit standard pattern if possible. */
6055 if (smode_bsize == bitsize)
6057 emit_move_insn (adjust_address (dest, smode, 0),
6058 gen_lowpart (smode, src));
6062 /* (set (ze (mem)) (const_int)). */
6063 else if (const_int_operand (src, VOIDmode))
6065 int size = bitsize / BITS_PER_UNIT;
6066 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6068 UNITS_PER_WORD - size);
6070 dest = adjust_address (dest, BLKmode, 0);
6071 set_mem_size (dest, size);
6072 s390_expand_movmem (dest, src_mem, GEN_INT (size));
6076 /* (set (ze (mem)) (reg)). */
6077 else if (register_operand (src, word_mode))
6080 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6084 /* Emit st,stcmh sequence. */
6085 int stcmh_width = bitsize - 32;
6086 int size = stcmh_width / BITS_PER_UNIT;
6088 emit_move_insn (adjust_address (dest, SImode, size),
6089 gen_lowpart (SImode, src));
6090 set_mem_size (dest, size);
6091 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6092 GEN_INT (stcmh_width),
6094 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6100 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6101 if ((bitpos % BITS_PER_UNIT) == 0
6102 && (bitsize % BITS_PER_UNIT) == 0
6103 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6105 && (mode == DImode || mode == SImode)
6106 && register_operand (dest, mode))
6108 /* Emit a strict_low_part pattern if possible. */
6109 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6111 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6112 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6113 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6114 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6118 /* ??? There are more powerful versions of ICM that are not
6119 completely represented in the md file. */
6122 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6123 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6125 machine_mode mode_s = GET_MODE (src);
6127 if (CONSTANT_P (src))
6129 /* For constant zero values the representation with AND
6130 appears to be folded in more situations than the (set
6131 (zero_extract) ...).
6132 We only do this when the start and end of the bitfield
6133 remain in the same SImode chunk. That way nihf or nilf
6135 The AND patterns might still generate a risbg for this. */
6136 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6139 src = force_reg (mode, src);
6141 else if (mode_s != mode)
6143 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6144 src = force_reg (mode_s, src);
6145 src = gen_lowpart (mode, src);
6148 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6149 op = gen_rtx_SET (op, src);
6153 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6154 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6164 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6165 register that holds VAL of mode MODE shifted by COUNT bits. */
6168 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6170 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6171 NULL_RTX, 1, OPTAB_DIRECT);
6172 return expand_simple_binop (SImode, ASHIFT, val, count,
6173 NULL_RTX, 1, OPTAB_DIRECT);
6176 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6177 the result in TARGET. */
6180 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6181 rtx cmp_op1, rtx cmp_op2)
6183 machine_mode mode = GET_MODE (target);
6184 bool neg_p = false, swap_p = false;
6187 if (GET_MODE (cmp_op1) == V2DFmode)
6191 /* NE a != b -> !(a == b) */
6192 case NE: cond = EQ; neg_p = true; break;
6193 /* UNGT a u> b -> !(b >= a) */
6194 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6195 /* UNGE a u>= b -> !(b > a) */
6196 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6197 /* LE: a <= b -> b >= a */
6198 case LE: cond = GE; swap_p = true; break;
6199 /* UNLE: a u<= b -> !(a > b) */
6200 case UNLE: cond = GT; neg_p = true; break;
6201 /* LT: a < b -> b > a */
6202 case LT: cond = GT; swap_p = true; break;
6203 /* UNLT: a u< b -> !(a >= b) */
6204 case UNLT: cond = GE; neg_p = true; break;
6206 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
6209 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
6212 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
6215 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
6224 /* NE: a != b -> !(a == b) */
6225 case NE: cond = EQ; neg_p = true; break;
6226 /* GE: a >= b -> !(b > a) */
6227 case GE: cond = GT; neg_p = true; swap_p = true; break;
6228 /* GEU: a >= b -> !(b > a) */
6229 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6230 /* LE: a <= b -> !(a > b) */
6231 case LE: cond = GT; neg_p = true; break;
6232 /* LEU: a <= b -> !(a > b) */
6233 case LEU: cond = GTU; neg_p = true; break;
6234 /* LT: a < b -> b > a */
6235 case LT: cond = GT; swap_p = true; break;
6236 /* LTU: a < b -> b > a */
6237 case LTU: cond = GTU; swap_p = true; break;
6244 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6247 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6249 cmp_op1, cmp_op2)));
6251 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6254 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6255 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6256 elements in CMP1 and CMP2 fulfill the comparison. */
6258 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6259 rtx cmp1, rtx cmp2, bool all_p)
6261 enum rtx_code new_code = code;
6262 machine_mode cmp_mode, full_cmp_mode, scratch_mode;
6263 rtx tmp_reg = gen_reg_rtx (SImode);
6264 bool swap_p = false;
6266 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6270 case EQ: cmp_mode = CCVEQmode; break;
6271 case NE: cmp_mode = CCVEQmode; break;
6272 case GT: cmp_mode = CCVHmode; break;
6273 case GE: cmp_mode = CCVHmode; new_code = LE; swap_p = true; break;
6274 case LT: cmp_mode = CCVHmode; new_code = GT; swap_p = true; break;
6275 case LE: cmp_mode = CCVHmode; new_code = LE; break;
6276 case GTU: cmp_mode = CCVHUmode; break;
6277 case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break;
6278 case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break;
6279 case LEU: cmp_mode = CCVHUmode; new_code = LEU; break;
6280 default: gcc_unreachable ();
6282 scratch_mode = GET_MODE (cmp1);
6284 else if (GET_MODE (cmp1) == V2DFmode)
6288 case EQ: cmp_mode = CCVEQmode; break;
6289 case NE: cmp_mode = CCVEQmode; break;
6290 case GT: cmp_mode = CCVFHmode; break;
6291 case GE: cmp_mode = CCVFHEmode; break;
6292 case UNLE: cmp_mode = CCVFHmode; break;
6293 case UNLT: cmp_mode = CCVFHEmode; break;
6294 case LT: cmp_mode = CCVFHmode; new_code = GT; swap_p = true; break;
6295 case LE: cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break;
6296 default: gcc_unreachable ();
6298 scratch_mode = V2DImode;
6306 case CCVEQmode: full_cmp_mode = CCVEQANYmode; break;
6307 case CCVHmode: full_cmp_mode = CCVHANYmode; break;
6308 case CCVHUmode: full_cmp_mode = CCVHUANYmode; break;
6309 case CCVFHmode: full_cmp_mode = CCVFHANYmode; break;
6310 case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break;
6311 default: gcc_unreachable ();
6314 /* The modes without ANY match the ALL modes. */
6315 full_cmp_mode = cmp_mode;
6324 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6325 gen_rtvec (2, gen_rtx_SET (
6326 gen_rtx_REG (cmp_mode, CC_REGNUM),
6327 gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)),
6328 gen_rtx_CLOBBER (VOIDmode,
6329 gen_rtx_SCRATCH (scratch_mode)))));
6330 emit_move_insn (target, const0_rtx);
6331 emit_move_insn (tmp_reg, const1_rtx);
6333 emit_move_insn (target,
6334 gen_rtx_IF_THEN_ELSE (SImode,
6335 gen_rtx_fmt_ee (new_code, VOIDmode,
6336 gen_rtx_REG (full_cmp_mode, CC_REGNUM),
6341 /* Generate a vector comparison expression loading either elements of
6342 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6346 s390_expand_vcond (rtx target, rtx then, rtx els,
6347 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6350 machine_mode result_mode;
6353 machine_mode target_mode = GET_MODE (target);
6354 machine_mode cmp_mode = GET_MODE (cmp_op1);
6355 rtx op = (cond == LT) ? els : then;
6357 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6358 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6359 for short and byte (x >> 15 and x >> 7 respectively). */
6360 if ((cond == LT || cond == GE)
6361 && target_mode == cmp_mode
6362 && cmp_op2 == CONST0_RTX (cmp_mode)
6363 && op == CONST0_RTX (target_mode)
6364 && s390_vector_mode_supported_p (target_mode)
6365 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6367 rtx negop = (cond == LT) ? then : els;
6369 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6371 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6372 if (negop == CONST1_RTX (target_mode))
6374 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6375 GEN_INT (shift), target,
6378 emit_move_insn (target, res);
6382 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6383 else if (all_ones_operand (negop, target_mode))
6385 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6386 GEN_INT (shift), target,
6389 emit_move_insn (target, res);
6394 /* We always use an integral type vector to hold the comparison
6396 result_mode = cmp_mode == V2DFmode ? V2DImode : cmp_mode;
6397 result_target = gen_reg_rtx (result_mode);
6399 /* We allow vector immediates as comparison operands that
6400 can be handled by the optimization above but not by the
6401 following code. Hence, force them into registers here. */
6402 if (!REG_P (cmp_op1))
6403 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6405 if (!REG_P (cmp_op2))
6406 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6408 s390_expand_vec_compare (result_target, cond,
6411 /* If the results are supposed to be either -1 or 0 we are done
6412 since this is what our compare instructions generate anyway. */
6413 if (all_ones_operand (then, GET_MODE (then))
6414 && const0_operand (els, GET_MODE (els)))
6416 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6421 /* Otherwise we will do a vsel afterwards. */
6422 /* This gets triggered e.g.
6423 with gcc.c-torture/compile/pr53410-1.c */
6425 then = force_reg (target_mode, then);
6428 els = force_reg (target_mode, els);
6430 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6432 CONST0_RTX (result_mode));
6434 /* We compared the result against zero above so we have to swap then
6436 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6438 gcc_assert (target_mode == GET_MODE (then));
6439 emit_insn (gen_rtx_SET (target, tmp));
6442 /* Emit the RTX necessary to initialize the vector TARGET with values
6445 s390_expand_vec_init (rtx target, rtx vals)
6447 machine_mode mode = GET_MODE (target);
6448 machine_mode inner_mode = GET_MODE_INNER (mode);
6449 int n_elts = GET_MODE_NUNITS (mode);
6450 bool all_same = true, all_regs = true, all_const_int = true;
6454 for (i = 0; i < n_elts; ++i)
6456 x = XVECEXP (vals, 0, i);
6458 if (!CONST_INT_P (x))
6459 all_const_int = false;
6461 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6468 /* Use vector gen mask or vector gen byte mask if possible. */
6469 if (all_same && all_const_int
6470 && (XVECEXP (vals, 0, 0) == const0_rtx
6471 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6473 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6475 emit_insn (gen_rtx_SET (target,
6476 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6482 emit_insn (gen_rtx_SET (target,
6483 gen_rtx_VEC_DUPLICATE (mode,
6484 XVECEXP (vals, 0, 0))));
6488 if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode)
6490 /* Use vector load pair. */
6491 emit_insn (gen_rtx_SET (target,
6492 gen_rtx_VEC_CONCAT (mode,
6493 XVECEXP (vals, 0, 0),
6494 XVECEXP (vals, 0, 1))));
6498 /* We are about to set the vector elements one by one. Zero out the
6499 full register first in order to help the data flow framework to
6500 detect it as full VR set. */
6501 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6503 /* Unfortunately the vec_init expander is not allowed to fail. So
6504 we have to implement the fallback ourselves. */
6505 for (i = 0; i < n_elts; i++)
6507 rtx elem = XVECEXP (vals, 0, i);
6508 if (!general_operand (elem, GET_MODE (elem)))
6509 elem = force_reg (inner_mode, elem);
6511 emit_insn (gen_rtx_SET (target,
6512 gen_rtx_UNSPEC (mode,
6514 GEN_INT (i), target),
6519 /* Structure to hold the initial parameters for a compare_and_swap operation
6520 in HImode and QImode. */
6522 struct alignment_context
6524 rtx memsi; /* SI aligned memory location. */
6525 rtx shift; /* Bit offset with regard to lsb. */
6526 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6527 rtx modemaski; /* ~modemask */
6528 bool aligned; /* True if memory is aligned, false else. */
6531 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6532 structure AC for transparent simplifying, if the memory alignment is known
6533 to be at least 32bit. MEM is the memory location for the actual operation
6534 and MODE its mode. */
6537 init_alignment_context (struct alignment_context *ac, rtx mem,
6540 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6541 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6544 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6547 /* Alignment is unknown. */
6548 rtx byteoffset, addr, align;
6550 /* Force the address into a register. */
6551 addr = force_reg (Pmode, XEXP (mem, 0));
6553 /* Align it to SImode. */
6554 align = expand_simple_binop (Pmode, AND, addr,
6555 GEN_INT (-GET_MODE_SIZE (SImode)),
6556 NULL_RTX, 1, OPTAB_DIRECT);
6558 ac->memsi = gen_rtx_MEM (SImode, align);
6559 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6560 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6561 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6563 /* Calculate shiftcount. */
6564 byteoffset = expand_simple_binop (Pmode, AND, addr,
6565 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6566 NULL_RTX, 1, OPTAB_DIRECT);
6567 /* As we already have some offset, evaluate the remaining distance. */
6568 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6569 NULL_RTX, 1, OPTAB_DIRECT);
6572 /* Shift is the byte count, but we need the bitcount. */
6573 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6574 NULL_RTX, 1, OPTAB_DIRECT);
6576 /* Calculate masks. */
6577 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6578 GEN_INT (GET_MODE_MASK (mode)),
6579 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6580 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6584 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6585 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6586 perform the merge in SEQ2. */
6589 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6590 machine_mode mode, rtx val, rtx ins)
6597 tmp = copy_to_mode_reg (SImode, val);
6598 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6602 *seq2 = get_insns ();
6609 /* Failed to use insv. Generate a two part shift and mask. */
6611 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6612 *seq1 = get_insns ();
6616 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6617 *seq2 = get_insns ();
6623 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6624 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6625 value to set if CMP == MEM. */
6628 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6629 rtx cmp, rtx new_rtx, bool is_weak)
6631 struct alignment_context ac;
6632 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6633 rtx res = gen_reg_rtx (SImode);
6634 rtx_code_label *csloop = NULL, *csend = NULL;
6636 gcc_assert (MEM_P (mem));
6638 init_alignment_context (&ac, mem, mode);
6640 /* Load full word. Subsequent loads are performed by CS. */
6641 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6642 NULL_RTX, 1, OPTAB_DIRECT);
6644 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6645 possible, we try to use insv to make this happen efficiently. If
6646 that fails we'll generate code both inside and outside the loop. */
6647 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6648 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6655 /* Start CS loop. */
6658 /* Begin assuming success. */
6659 emit_move_insn (btarget, const1_rtx);
6661 csloop = gen_label_rtx ();
6662 csend = gen_label_rtx ();
6663 emit_label (csloop);
6666 /* val = "<mem>00..0<mem>"
6667 * cmp = "00..0<cmp>00..0"
6668 * new = "00..0<new>00..0"
6674 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
6676 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6681 /* Jump to end if we're done (likely?). */
6682 s390_emit_jump (csend, cc);
6684 /* Check for changes outside mode, and loop internal if so.
6685 Arrange the moves so that the compare is adjacent to the
6686 branch so that we can generate CRJ. */
6687 tmp = copy_to_reg (val);
6688 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6690 cc = s390_emit_compare (NE, val, tmp);
6691 s390_emit_jump (csloop, cc);
6694 emit_move_insn (btarget, const0_rtx);
6698 /* Return the correct part of the bitfield. */
6699 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6700 NULL_RTX, 1, OPTAB_DIRECT), 1);
6703 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
6704 and VAL the value to play with. If AFTER is true then store the value
6705 MEM holds after the operation, if AFTER is false then store the value MEM
6706 holds before the operation. If TARGET is zero then discard that value, else
6707 store it to TARGET. */
6710 s390_expand_atomic (machine_mode mode, enum rtx_code code,
6711 rtx target, rtx mem, rtx val, bool after)
6713 struct alignment_context ac;
6715 rtx new_rtx = gen_reg_rtx (SImode);
6716 rtx orig = gen_reg_rtx (SImode);
6717 rtx_code_label *csloop = gen_label_rtx ();
6719 gcc_assert (!target || register_operand (target, VOIDmode));
6720 gcc_assert (MEM_P (mem));
6722 init_alignment_context (&ac, mem, mode);
6724 /* Shift val to the correct bit positions.
6725 Preserve "icm", but prevent "ex icm". */
6726 if (!(ac.aligned && code == SET && MEM_P (val)))
6727 val = s390_expand_mask_and_shift (val, mode, ac.shift);
6729 /* Further preparation insns. */
6730 if (code == PLUS || code == MINUS)
6731 emit_move_insn (orig, val);
6732 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
6733 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
6734 NULL_RTX, 1, OPTAB_DIRECT);
6736 /* Load full word. Subsequent loads are performed by CS. */
6737 cmp = force_reg (SImode, ac.memsi);
6739 /* Start CS loop. */
6740 emit_label (csloop);
6741 emit_move_insn (new_rtx, cmp);
6743 /* Patch new with val at correct position. */
6748 val = expand_simple_binop (SImode, code, new_rtx, orig,
6749 NULL_RTX, 1, OPTAB_DIRECT);
6750 val = expand_simple_binop (SImode, AND, val, ac.modemask,
6751 NULL_RTX, 1, OPTAB_DIRECT);
6754 if (ac.aligned && MEM_P (val))
6755 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
6756 0, 0, SImode, val, false);
6759 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
6760 NULL_RTX, 1, OPTAB_DIRECT);
6761 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
6762 NULL_RTX, 1, OPTAB_DIRECT);
6768 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
6769 NULL_RTX, 1, OPTAB_DIRECT);
6771 case MULT: /* NAND */
6772 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
6773 NULL_RTX, 1, OPTAB_DIRECT);
6774 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
6775 NULL_RTX, 1, OPTAB_DIRECT);
6781 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
6782 ac.memsi, cmp, new_rtx));
6784 /* Return the correct part of the bitfield. */
6786 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
6787 after ? new_rtx : cmp, ac.shift,
6788 NULL_RTX, 1, OPTAB_DIRECT), 1);
6791 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6792 We need to emit DTP-relative relocations. */
6794 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6797 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
6802 fputs ("\t.long\t", file);
6805 fputs ("\t.quad\t", file);
6810 output_addr_const (file, x);
6811 fputs ("@DTPOFF", file);
6814 /* Return the proper mode for REGNO being represented in the dwarf
6817 s390_dwarf_frame_reg_mode (int regno)
6819 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
6821 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
6822 if (GENERAL_REGNO_P (regno))
6825 /* The rightmost 64 bits of vector registers are call-clobbered. */
6826 if (GET_MODE_SIZE (save_mode) > 8)
6832 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
6833 /* Implement TARGET_MANGLE_TYPE. */
6836 s390_mangle_type (const_tree type)
6838 type = TYPE_MAIN_VARIANT (type);
6840 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
6841 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
6844 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
6845 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
6846 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
6847 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
6849 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
6850 && TARGET_LONG_DOUBLE_128)
6853 /* For all other types, use normal C++ mangling. */
6858 /* In the name of slightly smaller debug output, and to cater to
6859 general assembler lossage, recognize various UNSPEC sequences
6860 and turn them back into a direct symbol reference. */
6863 s390_delegitimize_address (rtx orig_x)
6867 orig_x = delegitimize_mem_from_attrs (orig_x);
6870 /* Extract the symbol ref from:
6871 (plus:SI (reg:SI 12 %r12)
6872 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
6873 UNSPEC_GOTOFF/PLTOFF)))
6875 (plus:SI (reg:SI 12 %r12)
6876 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
6877 UNSPEC_GOTOFF/PLTOFF)
6878 (const_int 4 [0x4])))) */
6879 if (GET_CODE (x) == PLUS
6880 && REG_P (XEXP (x, 0))
6881 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
6882 && GET_CODE (XEXP (x, 1)) == CONST)
6884 HOST_WIDE_INT offset = 0;
6886 /* The const operand. */
6887 y = XEXP (XEXP (x, 1), 0);
6889 if (GET_CODE (y) == PLUS
6890 && GET_CODE (XEXP (y, 1)) == CONST_INT)
6892 offset = INTVAL (XEXP (y, 1));
6896 if (GET_CODE (y) == UNSPEC
6897 && (XINT (y, 1) == UNSPEC_GOTOFF
6898 || XINT (y, 1) == UNSPEC_PLTOFF))
6899 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
6902 if (GET_CODE (x) != MEM)
6906 if (GET_CODE (x) == PLUS
6907 && GET_CODE (XEXP (x, 1)) == CONST
6908 && GET_CODE (XEXP (x, 0)) == REG
6909 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6911 y = XEXP (XEXP (x, 1), 0);
6912 if (GET_CODE (y) == UNSPEC
6913 && XINT (y, 1) == UNSPEC_GOT)
6914 y = XVECEXP (y, 0, 0);
6918 else if (GET_CODE (x) == CONST)
6920 /* Extract the symbol ref from:
6921 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
6922 UNSPEC_PLT/GOTENT))) */
6925 if (GET_CODE (y) == UNSPEC
6926 && (XINT (y, 1) == UNSPEC_GOTENT
6927 || XINT (y, 1) == UNSPEC_PLT))
6928 y = XVECEXP (y, 0, 0);
6935 if (GET_MODE (orig_x) != Pmode)
6937 if (GET_MODE (orig_x) == BLKmode)
6939 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
6946 /* Output operand OP to stdio stream FILE.
6947 OP is an address (register + offset) which is not used to address data;
6948 instead the rightmost bits are interpreted as the value. */
6951 print_addrstyle_operand (FILE *file, rtx op)
6953 HOST_WIDE_INT offset;
6956 /* Extract base register and offset. */
6957 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
6963 gcc_assert (GET_CODE (base) == REG);
6964 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
6965 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
6968 /* Offsets are constricted to twelve bits. */
6969 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
6971 fprintf (file, "(%s)", reg_names[REGNO (base)]);
6974 /* Assigns the number of NOP halfwords to be emitted before and after the
6975 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
6976 If hotpatching is disabled for the function, the values are set to zero.
6980 s390_function_num_hotpatch_hw (tree decl,
6986 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
6988 /* Handle the arguments of the hotpatch attribute. The values
6989 specified via attribute might override the cmdline argument
6993 tree args = TREE_VALUE (attr);
6995 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
6996 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7000 /* Use the values specified by the cmdline arguments. */
7001 *hw_before = s390_hotpatch_hw_before_label;
7002 *hw_after = s390_hotpatch_hw_after_label;
7006 /* Write the current .machine and .machinemode specification to the assembler
7009 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7011 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7013 fprintf (asm_out_file, "\t.machinemode %s\n",
7014 (TARGET_ZARCH) ? "zarch" : "esa");
7015 fprintf (asm_out_file, "\t.machine \"%s", processor_table[s390_arch].name);
7016 if (S390_USE_ARCHITECTURE_MODIFIERS)
7020 cpu_flags = processor_flags_table[(int) s390_arch];
7021 if (TARGET_HTM && !(cpu_flags & PF_TX))
7022 fprintf (asm_out_file, "+htm");
7023 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7024 fprintf (asm_out_file, "+nohtm");
7025 if (TARGET_VX && !(cpu_flags & PF_VX))
7026 fprintf (asm_out_file, "+vx");
7027 else if (!TARGET_VX && (cpu_flags & PF_VX))
7028 fprintf (asm_out_file, "+novx");
7030 fprintf (asm_out_file, "\"\n");
7033 /* Write an extra function header before the very start of the function. */
7036 s390_asm_output_function_prefix (FILE *asm_out_file,
7037 const char *fnname ATTRIBUTE_UNUSED)
7039 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7041 /* Since only the function specific options are saved but not the indications
7042 which options are set, it's too much work here to figure out which options
7043 have actually changed. Thus, generate .machine and .machinemode whenever a
7044 function has the target attribute or pragma. */
7045 fprintf (asm_out_file, "\t.machinemode push\n");
7046 fprintf (asm_out_file, "\t.machine push\n");
7047 s390_asm_output_machine_for_arch (asm_out_file);
7050 /* Write an extra function footer after the very end of the function. */
7053 s390_asm_declare_function_size (FILE *asm_out_file,
7054 const char *fnname, tree decl)
7056 if (!flag_inhibit_size_directive)
7057 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7058 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7060 fprintf (asm_out_file, "\t.machine pop\n");
7061 fprintf (asm_out_file, "\t.machinemode pop\n");
7065 /* Write the extra assembler code needed to declare a function properly. */
7068 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7071 int hw_before, hw_after;
7073 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7076 unsigned int function_alignment;
7079 /* Add a trampoline code area before the function label and initialize it
7080 with two-byte nop instructions. This area can be overwritten with code
7081 that jumps to a patched version of the function. */
7082 asm_fprintf (asm_out_file, "\tnopr\t%%r7"
7083 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7085 for (i = 1; i < hw_before; i++)
7086 fputs ("\tnopr\t%r7\n", asm_out_file);
7088 /* Note: The function label must be aligned so that (a) the bytes of the
7089 following nop do not cross a cacheline boundary, and (b) a jump address
7090 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7091 stored directly before the label without crossing a cacheline
7092 boundary. All this is necessary to make sure the trampoline code can
7093 be changed atomically.
7094 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7095 if there are NOPs before the function label, the alignment is placed
7096 before them. So it is necessary to duplicate the alignment after the
7098 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7099 if (! DECL_USER_ALIGN (decl))
7100 function_alignment = MAX (function_alignment,
7101 (unsigned int) align_functions);
7102 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7103 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
7106 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7108 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7109 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7110 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7111 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7112 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7113 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7114 s390_warn_framesize);
7115 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7116 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7117 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7118 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7119 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7120 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7121 TARGET_PACKED_STACK);
7122 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7123 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7124 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7125 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7126 s390_warn_dynamicstack_p);
7128 ASM_OUTPUT_LABEL (asm_out_file, fname);
7130 asm_fprintf (asm_out_file,
7131 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7135 /* Output machine-dependent UNSPECs occurring in address constant X
7136 in assembler syntax to stdio stream FILE. Returns true if the
7137 constant X could be recognized, false otherwise. */
7140 s390_output_addr_const_extra (FILE *file, rtx x)
7142 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7143 switch (XINT (x, 1))
7146 output_addr_const (file, XVECEXP (x, 0, 0));
7147 fprintf (file, "@GOTENT");
7150 output_addr_const (file, XVECEXP (x, 0, 0));
7151 fprintf (file, "@GOT");
7154 output_addr_const (file, XVECEXP (x, 0, 0));
7155 fprintf (file, "@GOTOFF");
7158 output_addr_const (file, XVECEXP (x, 0, 0));
7159 fprintf (file, "@PLT");
7162 output_addr_const (file, XVECEXP (x, 0, 0));
7163 fprintf (file, "@PLTOFF");
7166 output_addr_const (file, XVECEXP (x, 0, 0));
7167 fprintf (file, "@TLSGD");
7170 assemble_name (file, get_some_local_dynamic_name ());
7171 fprintf (file, "@TLSLDM");
7174 output_addr_const (file, XVECEXP (x, 0, 0));
7175 fprintf (file, "@DTPOFF");
7178 output_addr_const (file, XVECEXP (x, 0, 0));
7179 fprintf (file, "@NTPOFF");
7181 case UNSPEC_GOTNTPOFF:
7182 output_addr_const (file, XVECEXP (x, 0, 0));
7183 fprintf (file, "@GOTNTPOFF");
7185 case UNSPEC_INDNTPOFF:
7186 output_addr_const (file, XVECEXP (x, 0, 0));
7187 fprintf (file, "@INDNTPOFF");
7191 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7192 switch (XINT (x, 1))
7194 case UNSPEC_POOL_OFFSET:
7195 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7196 output_addr_const (file, x);
7202 /* Output address operand ADDR in assembler syntax to
7203 stdio stream FILE. */
7206 print_operand_address (FILE *file, rtx addr)
7208 struct s390_address ad;
7210 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7214 output_operand_lossage ("symbolic memory references are "
7215 "only supported on z10 or later");
7218 output_addr_const (file, addr);
7222 if (!s390_decompose_address (addr, &ad)
7223 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7224 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7225 output_operand_lossage ("cannot decompose address");
7228 output_addr_const (file, ad.disp);
7230 fprintf (file, "0");
7232 if (ad.base && ad.indx)
7233 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7234 reg_names[REGNO (ad.base)]);
7236 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7239 /* Output operand X in assembler syntax to stdio stream FILE.
7240 CODE specified the format flag. The following format flags
7243 'C': print opcode suffix for branch condition.
7244 'D': print opcode suffix for inverse branch condition.
7245 'E': print opcode suffix for branch on index instruction.
7246 'G': print the size of the operand in bytes.
7247 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7248 'M': print the second word of a TImode operand.
7249 'N': print the second word of a DImode operand.
7250 'O': print only the displacement of a memory reference or address.
7251 'R': print only the base register of a memory reference or address.
7252 'S': print S-type memory reference (base+displacement).
7253 'Y': print address style operand without index (e.g. shift count or setmem
7256 'b': print integer X as if it's an unsigned byte.
7257 'c': print integer X as if it's an signed byte.
7258 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7259 'f': "end" contiguous bitmask X in SImode.
7260 'h': print integer X as if it's a signed halfword.
7261 'i': print the first nonzero HImode part of X.
7262 'j': print the first HImode part unequal to -1 of X.
7263 'k': print the first nonzero SImode part of X.
7264 'm': print the first SImode part unequal to -1 of X.
7265 'o': print integer X as if it's an unsigned 32bit word.
7266 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7267 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7268 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7269 'x': print integer X as if it's an unsigned halfword.
7270 'v': print register number as vector register (v1 instead of f1).
7274 print_operand (FILE *file, rtx x, int code)
7281 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7285 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7289 if (GET_CODE (x) == LE)
7290 fprintf (file, "l");
7291 else if (GET_CODE (x) == GT)
7292 fprintf (file, "h");
7294 output_operand_lossage ("invalid comparison operator "
7295 "for 'E' output modifier");
7299 if (GET_CODE (x) == SYMBOL_REF)
7301 fprintf (file, "%s", ":tls_load:");
7302 output_addr_const (file, x);
7304 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7306 fprintf (file, "%s", ":tls_gdcall:");
7307 output_addr_const (file, XVECEXP (x, 0, 0));
7309 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7311 fprintf (file, "%s", ":tls_ldcall:");
7312 const char *name = get_some_local_dynamic_name ();
7314 assemble_name (file, name);
7317 output_operand_lossage ("invalid reference for 'J' output modifier");
7321 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7326 struct s390_address ad;
7329 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7332 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7335 output_operand_lossage ("invalid address for 'O' output modifier");
7340 output_addr_const (file, ad.disp);
7342 fprintf (file, "0");
7348 struct s390_address ad;
7351 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7354 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7357 output_operand_lossage ("invalid address for 'R' output modifier");
7362 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7364 fprintf (file, "0");
7370 struct s390_address ad;
7375 output_operand_lossage ("memory reference expected for "
7376 "'S' output modifier");
7379 ret = s390_decompose_address (XEXP (x, 0), &ad);
7382 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7385 output_operand_lossage ("invalid address for 'S' output modifier");
7390 output_addr_const (file, ad.disp);
7392 fprintf (file, "0");
7395 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7400 if (GET_CODE (x) == REG)
7401 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7402 else if (GET_CODE (x) == MEM)
7403 x = change_address (x, VOIDmode,
7404 plus_constant (Pmode, XEXP (x, 0), 4));
7406 output_operand_lossage ("register or memory expression expected "
7407 "for 'N' output modifier");
7411 if (GET_CODE (x) == REG)
7412 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7413 else if (GET_CODE (x) == MEM)
7414 x = change_address (x, VOIDmode,
7415 plus_constant (Pmode, XEXP (x, 0), 8));
7417 output_operand_lossage ("register or memory expression expected "
7418 "for 'M' output modifier");
7422 print_addrstyle_operand (file, x);
7426 switch (GET_CODE (x))
7429 /* Print FP regs as fx instead of vx when they are accessed
7430 through non-vector mode. */
7432 || VECTOR_NOFP_REG_P (x)
7433 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7434 || (VECTOR_REG_P (x)
7435 && (GET_MODE_SIZE (GET_MODE (x)) /
7436 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7437 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7439 fprintf (file, "%s", reg_names[REGNO (x)]);
7443 output_address (GET_MODE (x), XEXP (x, 0));
7450 output_addr_const (file, x);
7463 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7469 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7472 ival = s390_extract_part (x, HImode, 0);
7475 ival = s390_extract_part (x, HImode, -1);
7478 ival = s390_extract_part (x, SImode, 0);
7481 ival = s390_extract_part (x, SImode, -1);
7493 len = (code == 's' || code == 'e' ? 64 : 32);
7494 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7496 if (code == 's' || code == 't')
7503 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7505 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7508 case CONST_WIDE_INT:
7510 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7511 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7512 else if (code == 'x')
7513 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7514 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7515 else if (code == 'h')
7516 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7517 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7521 output_operand_lossage ("invalid constant - try using "
7522 "an output modifier");
7524 output_operand_lossage ("invalid constant for output modifier '%c'",
7532 gcc_assert (const_vec_duplicate_p (x));
7533 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7534 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7542 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
7544 ival = (code == 's') ? start : end;
7545 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7551 bool ok = s390_bytemask_vector_p (x, &mask);
7553 fprintf (file, "%u", mask);
7558 output_operand_lossage ("invalid constant vector for output "
7559 "modifier '%c'", code);
7565 output_operand_lossage ("invalid expression - try using "
7566 "an output modifier");
7568 output_operand_lossage ("invalid expression for output "
7569 "modifier '%c'", code);
7574 /* Target hook for assembling integer objects. We need to define it
7575 here to work a round a bug in some versions of GAS, which couldn't
7576 handle values smaller than INT_MIN when printed in decimal. */
7579 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7581 if (size == 8 && aligned_p
7582 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7584 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7588 return default_assemble_integer (x, size, aligned_p);
7591 /* Returns true if register REGNO is used for forming
7592 a memory address in expression X. */
7595 reg_used_in_mem_p (int regno, rtx x)
7597 enum rtx_code code = GET_CODE (x);
7603 if (refers_to_regno_p (regno, XEXP (x, 0)))
7606 else if (code == SET
7607 && GET_CODE (SET_DEST (x)) == PC)
7609 if (refers_to_regno_p (regno, SET_SRC (x)))
7613 fmt = GET_RTX_FORMAT (code);
7614 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7617 && reg_used_in_mem_p (regno, XEXP (x, i)))
7620 else if (fmt[i] == 'E')
7621 for (j = 0; j < XVECLEN (x, i); j++)
7622 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
7628 /* Returns true if expression DEP_RTX sets an address register
7629 used by instruction INSN to address memory. */
7632 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
7636 if (NONJUMP_INSN_P (dep_rtx))
7637 dep_rtx = PATTERN (dep_rtx);
7639 if (GET_CODE (dep_rtx) == SET)
7641 target = SET_DEST (dep_rtx);
7642 if (GET_CODE (target) == STRICT_LOW_PART)
7643 target = XEXP (target, 0);
7644 while (GET_CODE (target) == SUBREG)
7645 target = SUBREG_REG (target);
7647 if (GET_CODE (target) == REG)
7649 int regno = REGNO (target);
7651 if (s390_safe_attr_type (insn) == TYPE_LA)
7653 pat = PATTERN (insn);
7654 if (GET_CODE (pat) == PARALLEL)
7656 gcc_assert (XVECLEN (pat, 0) == 2);
7657 pat = XVECEXP (pat, 0, 0);
7659 gcc_assert (GET_CODE (pat) == SET);
7660 return refers_to_regno_p (regno, SET_SRC (pat));
7662 else if (get_attr_atype (insn) == ATYPE_AGEN)
7663 return reg_used_in_mem_p (regno, PATTERN (insn));
7669 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
7672 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
7674 rtx dep_rtx = PATTERN (dep_insn);
7677 if (GET_CODE (dep_rtx) == SET
7678 && addr_generation_dependency_p (dep_rtx, insn))
7680 else if (GET_CODE (dep_rtx) == PARALLEL)
7682 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
7684 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
7692 /* A C statement (sans semicolon) to update the integer scheduling priority
7693 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
7694 reduce the priority to execute INSN later. Do not define this macro if
7695 you do not need to adjust the scheduling priorities of insns.
7697 A STD instruction should be scheduled earlier,
7698 in order to use the bypass. */
7700 s390_adjust_priority (rtx_insn *insn, int priority)
7702 if (! INSN_P (insn))
7705 if (s390_tune <= PROCESSOR_2064_Z900)
7708 switch (s390_safe_attr_type (insn))
7712 priority = priority << 3;
7716 priority = priority << 1;
7725 /* The number of instructions that can be issued per cycle. */
7728 s390_issue_rate (void)
7732 case PROCESSOR_2084_Z990:
7733 case PROCESSOR_2094_Z9_109:
7734 case PROCESSOR_2094_Z9_EC:
7735 case PROCESSOR_2817_Z196:
7737 case PROCESSOR_2097_Z10:
7739 case PROCESSOR_9672_G5:
7740 case PROCESSOR_9672_G6:
7741 case PROCESSOR_2064_Z900:
7742 /* Starting with EC12 we use the sched_reorder hook to take care
7743 of instruction dispatch constraints. The algorithm only
7744 picks the best instruction and assumes only a single
7745 instruction gets issued per cycle. */
7746 case PROCESSOR_2827_ZEC12:
7747 case PROCESSOR_2964_Z13:
7754 s390_first_cycle_multipass_dfa_lookahead (void)
7759 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
7760 Fix up MEMs as required. */
7763 annotate_constant_pool_refs (rtx *x)
7768 gcc_assert (GET_CODE (*x) != SYMBOL_REF
7769 || !CONSTANT_POOL_ADDRESS_P (*x));
7771 /* Literal pool references can only occur inside a MEM ... */
7772 if (GET_CODE (*x) == MEM)
7774 rtx memref = XEXP (*x, 0);
7776 if (GET_CODE (memref) == SYMBOL_REF
7777 && CONSTANT_POOL_ADDRESS_P (memref))
7779 rtx base = cfun->machine->base_reg;
7780 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
7783 *x = replace_equiv_address (*x, addr);
7787 if (GET_CODE (memref) == CONST
7788 && GET_CODE (XEXP (memref, 0)) == PLUS
7789 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
7790 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
7791 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
7793 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
7794 rtx sym = XEXP (XEXP (memref, 0), 0);
7795 rtx base = cfun->machine->base_reg;
7796 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7799 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
7804 /* ... or a load-address type pattern. */
7805 if (GET_CODE (*x) == SET)
7807 rtx addrref = SET_SRC (*x);
7809 if (GET_CODE (addrref) == SYMBOL_REF
7810 && CONSTANT_POOL_ADDRESS_P (addrref))
7812 rtx base = cfun->machine->base_reg;
7813 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
7816 SET_SRC (*x) = addr;
7820 if (GET_CODE (addrref) == CONST
7821 && GET_CODE (XEXP (addrref, 0)) == PLUS
7822 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
7823 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
7824 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
7826 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
7827 rtx sym = XEXP (XEXP (addrref, 0), 0);
7828 rtx base = cfun->machine->base_reg;
7829 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7832 SET_SRC (*x) = plus_constant (Pmode, addr, off);
7837 /* Annotate LTREL_BASE as well. */
7838 if (GET_CODE (*x) == UNSPEC
7839 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7841 rtx base = cfun->machine->base_reg;
7842 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
7847 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7848 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7852 annotate_constant_pool_refs (&XEXP (*x, i));
7854 else if (fmt[i] == 'E')
7856 for (j = 0; j < XVECLEN (*x, i); j++)
7857 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
7862 /* Split all branches that exceed the maximum distance.
7863 Returns true if this created a new literal pool entry. */
7866 s390_split_branches (void)
7868 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
7869 int new_literal = 0, ret;
7874 /* We need correct insn addresses. */
7876 shorten_branches (get_insns ());
7878 /* Find all branches that exceed 64KB, and split them. */
7880 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7882 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
7885 pat = PATTERN (insn);
7886 if (GET_CODE (pat) == PARALLEL)
7887 pat = XVECEXP (pat, 0, 0);
7888 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
7891 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
7893 label = &SET_SRC (pat);
7895 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
7897 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
7898 label = &XEXP (SET_SRC (pat), 1);
7899 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
7900 label = &XEXP (SET_SRC (pat), 2);
7907 if (get_attr_length (insn) <= 4)
7910 /* We are going to use the return register as scratch register,
7911 make sure it will be saved/restored by the prologue/epilogue. */
7912 cfun_frame_layout.save_return_addr_p = 1;
7917 rtx mem = force_const_mem (Pmode, *label);
7918 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
7920 INSN_ADDRESSES_NEW (set_insn, -1);
7921 annotate_constant_pool_refs (&PATTERN (set_insn));
7928 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
7929 UNSPEC_LTREL_OFFSET);
7930 target = gen_rtx_CONST (Pmode, target);
7931 target = force_const_mem (Pmode, target);
7932 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
7934 INSN_ADDRESSES_NEW (set_insn, -1);
7935 annotate_constant_pool_refs (&PATTERN (set_insn));
7937 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
7938 cfun->machine->base_reg),
7940 target = gen_rtx_PLUS (Pmode, temp_reg, target);
7943 ret = validate_change (insn, label, target, 0);
7951 /* Find an annotated literal pool symbol referenced in RTX X,
7952 and store it at REF. Will abort if X contains references to
7953 more than one such pool symbol; multiple references to the same
7954 symbol are allowed, however.
7956 The rtx pointed to by REF must be initialized to NULL_RTX
7957 by the caller before calling this routine. */
7960 find_constant_pool_ref (rtx x, rtx *ref)
7965 /* Ignore LTREL_BASE references. */
7966 if (GET_CODE (x) == UNSPEC
7967 && XINT (x, 1) == UNSPEC_LTREL_BASE)
7969 /* Likewise POOL_ENTRY insns. */
7970 if (GET_CODE (x) == UNSPEC_VOLATILE
7971 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
7974 gcc_assert (GET_CODE (x) != SYMBOL_REF
7975 || !CONSTANT_POOL_ADDRESS_P (x));
7977 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
7979 rtx sym = XVECEXP (x, 0, 0);
7980 gcc_assert (GET_CODE (sym) == SYMBOL_REF
7981 && CONSTANT_POOL_ADDRESS_P (sym));
7983 if (*ref == NULL_RTX)
7986 gcc_assert (*ref == sym);
7991 fmt = GET_RTX_FORMAT (GET_CODE (x));
7992 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7996 find_constant_pool_ref (XEXP (x, i), ref);
7998 else if (fmt[i] == 'E')
8000 for (j = 0; j < XVECLEN (x, i); j++)
8001 find_constant_pool_ref (XVECEXP (x, i, j), ref);
8006 /* Replace every reference to the annotated literal pool
8007 symbol REF in X by its base plus OFFSET. */
8010 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
8015 gcc_assert (*x != ref);
8017 if (GET_CODE (*x) == UNSPEC
8018 && XINT (*x, 1) == UNSPEC_LTREF
8019 && XVECEXP (*x, 0, 0) == ref)
8021 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8025 if (GET_CODE (*x) == PLUS
8026 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8027 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8028 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8029 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8031 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8032 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8036 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8037 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8041 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
8043 else if (fmt[i] == 'E')
8045 for (j = 0; j < XVECLEN (*x, i); j++)
8046 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
8051 /* Check whether X contains an UNSPEC_LTREL_BASE.
8052 Return its constant pool symbol if found, NULL_RTX otherwise. */
8055 find_ltrel_base (rtx x)
8060 if (GET_CODE (x) == UNSPEC
8061 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8062 return XVECEXP (x, 0, 0);
8064 fmt = GET_RTX_FORMAT (GET_CODE (x));
8065 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8069 rtx fnd = find_ltrel_base (XEXP (x, i));
8073 else if (fmt[i] == 'E')
8075 for (j = 0; j < XVECLEN (x, i); j++)
8077 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
8087 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
8090 replace_ltrel_base (rtx *x)
8095 if (GET_CODE (*x) == UNSPEC
8096 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8098 *x = XVECEXP (*x, 0, 1);
8102 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8103 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8107 replace_ltrel_base (&XEXP (*x, i));
8109 else if (fmt[i] == 'E')
8111 for (j = 0; j < XVECLEN (*x, i); j++)
8112 replace_ltrel_base (&XVECEXP (*x, i, j));
8118 /* We keep a list of constants which we have to add to internal
8119 constant tables in the middle of large functions. */
8121 #define NR_C_MODES 32
8122 machine_mode constant_modes[NR_C_MODES] =
8124 TFmode, TImode, TDmode,
8125 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8126 V4SFmode, V2DFmode, V1TFmode,
8127 DFmode, DImode, DDmode,
8128 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8129 SFmode, SImode, SDmode,
8130 V4QImode, V2HImode, V1SImode, V1SFmode,
8139 struct constant *next;
8141 rtx_code_label *label;
8144 struct constant_pool
8146 struct constant_pool *next;
8147 rtx_insn *first_insn;
8148 rtx_insn *pool_insn;
8150 rtx_insn *emit_pool_after;
8152 struct constant *constants[NR_C_MODES];
8153 struct constant *execute;
8154 rtx_code_label *label;
8158 /* Allocate new constant_pool structure. */
8160 static struct constant_pool *
8161 s390_alloc_pool (void)
8163 struct constant_pool *pool;
8166 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8168 for (i = 0; i < NR_C_MODES; i++)
8169 pool->constants[i] = NULL;
8171 pool->execute = NULL;
8172 pool->label = gen_label_rtx ();
8173 pool->first_insn = NULL;
8174 pool->pool_insn = NULL;
8175 pool->insns = BITMAP_ALLOC (NULL);
8177 pool->emit_pool_after = NULL;
8182 /* Create new constant pool covering instructions starting at INSN
8183 and chain it to the end of POOL_LIST. */
8185 static struct constant_pool *
8186 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8188 struct constant_pool *pool, **prev;
8190 pool = s390_alloc_pool ();
8191 pool->first_insn = insn;
8193 for (prev = pool_list; *prev; prev = &(*prev)->next)
8200 /* End range of instructions covered by POOL at INSN and emit
8201 placeholder insn representing the pool. */
8204 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8206 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8209 insn = get_last_insn ();
8211 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8212 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8215 /* Add INSN to the list of insns covered by POOL. */
8218 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8220 bitmap_set_bit (pool->insns, INSN_UID (insn));
8223 /* Return pool out of POOL_LIST that covers INSN. */
8225 static struct constant_pool *
8226 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8228 struct constant_pool *pool;
8230 for (pool = pool_list; pool; pool = pool->next)
8231 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8237 /* Add constant VAL of mode MODE to the constant pool POOL. */
8240 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8245 for (i = 0; i < NR_C_MODES; i++)
8246 if (constant_modes[i] == mode)
8248 gcc_assert (i != NR_C_MODES);
8250 for (c = pool->constants[i]; c != NULL; c = c->next)
8251 if (rtx_equal_p (val, c->value))
8256 c = (struct constant *) xmalloc (sizeof *c);
8258 c->label = gen_label_rtx ();
8259 c->next = pool->constants[i];
8260 pool->constants[i] = c;
8261 pool->size += GET_MODE_SIZE (mode);
8265 /* Return an rtx that represents the offset of X from the start of
8269 s390_pool_offset (struct constant_pool *pool, rtx x)
8273 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8274 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8275 UNSPEC_POOL_OFFSET);
8276 return gen_rtx_CONST (GET_MODE (x), x);
8279 /* Find constant VAL of mode MODE in the constant pool POOL.
8280 Return an RTX describing the distance from the start of
8281 the pool to the location of the new constant. */
8284 s390_find_constant (struct constant_pool *pool, rtx val,
8290 for (i = 0; i < NR_C_MODES; i++)
8291 if (constant_modes[i] == mode)
8293 gcc_assert (i != NR_C_MODES);
8295 for (c = pool->constants[i]; c != NULL; c = c->next)
8296 if (rtx_equal_p (val, c->value))
8301 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8304 /* Check whether INSN is an execute. Return the label_ref to its
8305 execute target template if so, NULL_RTX otherwise. */
8308 s390_execute_label (rtx insn)
8310 if (NONJUMP_INSN_P (insn)
8311 && GET_CODE (PATTERN (insn)) == PARALLEL
8312 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8313 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8314 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8319 /* Add execute target for INSN to the constant pool POOL. */
8322 s390_add_execute (struct constant_pool *pool, rtx insn)
8326 for (c = pool->execute; c != NULL; c = c->next)
8327 if (INSN_UID (insn) == INSN_UID (c->value))
8332 c = (struct constant *) xmalloc (sizeof *c);
8334 c->label = gen_label_rtx ();
8335 c->next = pool->execute;
8341 /* Find execute target for INSN in the constant pool POOL.
8342 Return an RTX describing the distance from the start of
8343 the pool to the location of the execute target. */
8346 s390_find_execute (struct constant_pool *pool, rtx insn)
8350 for (c = pool->execute; c != NULL; c = c->next)
8351 if (INSN_UID (insn) == INSN_UID (c->value))
8356 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8359 /* For an execute INSN, extract the execute target template. */
8362 s390_execute_target (rtx insn)
8364 rtx pattern = PATTERN (insn);
8365 gcc_assert (s390_execute_label (insn));
8367 if (XVECLEN (pattern, 0) == 2)
8369 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8373 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8376 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8377 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8379 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8385 /* Indicate that INSN cannot be duplicated. This is the case for
8386 execute insns that carry a unique label. */
8389 s390_cannot_copy_insn_p (rtx_insn *insn)
8391 rtx label = s390_execute_label (insn);
8392 return label && label != const0_rtx;
8395 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8396 do not emit the pool base label. */
8399 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8402 rtx_insn *insn = pool->pool_insn;
8405 /* Switch to rodata section. */
8406 if (TARGET_CPU_ZARCH)
8408 insn = emit_insn_after (gen_pool_section_start (), insn);
8409 INSN_ADDRESSES_NEW (insn, -1);
8412 /* Ensure minimum pool alignment. */
8413 if (TARGET_CPU_ZARCH)
8414 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8416 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8417 INSN_ADDRESSES_NEW (insn, -1);
8419 /* Emit pool base label. */
8422 insn = emit_label_after (pool->label, insn);
8423 INSN_ADDRESSES_NEW (insn, -1);
8426 /* Dump constants in descending alignment requirement order,
8427 ensuring proper alignment for every constant. */
8428 for (i = 0; i < NR_C_MODES; i++)
8429 for (c = pool->constants[i]; c; c = c->next)
8431 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8432 rtx value = copy_rtx (c->value);
8433 if (GET_CODE (value) == CONST
8434 && GET_CODE (XEXP (value, 0)) == UNSPEC
8435 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8436 && XVECLEN (XEXP (value, 0), 0) == 1)
8437 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8439 insn = emit_label_after (c->label, insn);
8440 INSN_ADDRESSES_NEW (insn, -1);
8442 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8443 gen_rtvec (1, value),
8444 UNSPECV_POOL_ENTRY);
8445 insn = emit_insn_after (value, insn);
8446 INSN_ADDRESSES_NEW (insn, -1);
8449 /* Ensure minimum alignment for instructions. */
8450 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8451 INSN_ADDRESSES_NEW (insn, -1);
8453 /* Output in-pool execute template insns. */
8454 for (c = pool->execute; c; c = c->next)
8456 insn = emit_label_after (c->label, insn);
8457 INSN_ADDRESSES_NEW (insn, -1);
8459 insn = emit_insn_after (s390_execute_target (c->value), insn);
8460 INSN_ADDRESSES_NEW (insn, -1);
8463 /* Switch back to previous section. */
8464 if (TARGET_CPU_ZARCH)
8466 insn = emit_insn_after (gen_pool_section_end (), insn);
8467 INSN_ADDRESSES_NEW (insn, -1);
8470 insn = emit_barrier_after (insn);
8471 INSN_ADDRESSES_NEW (insn, -1);
8473 /* Remove placeholder insn. */
8474 remove_insn (pool->pool_insn);
8477 /* Free all memory used by POOL. */
8480 s390_free_pool (struct constant_pool *pool)
8482 struct constant *c, *next;
8485 for (i = 0; i < NR_C_MODES; i++)
8486 for (c = pool->constants[i]; c; c = next)
8492 for (c = pool->execute; c; c = next)
8498 BITMAP_FREE (pool->insns);
8503 /* Collect main literal pool. Return NULL on overflow. */
8505 static struct constant_pool *
8506 s390_mainpool_start (void)
8508 struct constant_pool *pool;
8511 pool = s390_alloc_pool ();
8513 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8515 if (NONJUMP_INSN_P (insn)
8516 && GET_CODE (PATTERN (insn)) == SET
8517 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8518 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8520 /* There might be two main_pool instructions if base_reg
8521 is call-clobbered; one for shrink-wrapped code and one
8522 for the rest. We want to keep the first. */
8523 if (pool->pool_insn)
8525 insn = PREV_INSN (insn);
8526 delete_insn (NEXT_INSN (insn));
8529 pool->pool_insn = insn;
8532 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8534 s390_add_execute (pool, insn);
8536 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8538 rtx pool_ref = NULL_RTX;
8539 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8542 rtx constant = get_pool_constant (pool_ref);
8543 machine_mode mode = get_pool_mode (pool_ref);
8544 s390_add_constant (pool, constant, mode);
8548 /* If hot/cold partitioning is enabled we have to make sure that
8549 the literal pool is emitted in the same section where the
8550 initialization of the literal pool base pointer takes place.
8551 emit_pool_after is only used in the non-overflow case on non
8552 Z cpus where we can emit the literal pool at the end of the
8553 function body within the text section. */
8555 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8556 && !pool->emit_pool_after)
8557 pool->emit_pool_after = PREV_INSN (insn);
8560 gcc_assert (pool->pool_insn || pool->size == 0);
8562 if (pool->size >= 4096)
8564 /* We're going to chunkify the pool, so remove the main
8565 pool placeholder insn. */
8566 remove_insn (pool->pool_insn);
8568 s390_free_pool (pool);
8572 /* If the functions ends with the section where the literal pool
8573 should be emitted set the marker to its end. */
8574 if (pool && !pool->emit_pool_after)
8575 pool->emit_pool_after = get_last_insn ();
8580 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8581 Modify the current function to output the pool constants as well as
8582 the pool register setup instruction. */
8585 s390_mainpool_finish (struct constant_pool *pool)
8587 rtx base_reg = cfun->machine->base_reg;
8589 /* If the pool is empty, we're done. */
8590 if (pool->size == 0)
8592 /* We don't actually need a base register after all. */
8593 cfun->machine->base_reg = NULL_RTX;
8595 if (pool->pool_insn)
8596 remove_insn (pool->pool_insn);
8597 s390_free_pool (pool);
8601 /* We need correct insn addresses. */
8602 shorten_branches (get_insns ());
8604 /* On zSeries, we use a LARL to load the pool register. The pool is
8605 located in the .rodata section, so we emit it after the function. */
8606 if (TARGET_CPU_ZARCH)
8608 rtx set = gen_main_base_64 (base_reg, pool->label);
8609 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8610 INSN_ADDRESSES_NEW (insn, -1);
8611 remove_insn (pool->pool_insn);
8613 insn = get_last_insn ();
8614 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8615 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8617 s390_dump_pool (pool, 0);
8620 /* On S/390, if the total size of the function's code plus literal pool
8621 does not exceed 4096 bytes, we use BASR to set up a function base
8622 pointer, and emit the literal pool at the end of the function. */
8623 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
8624 + pool->size + 8 /* alignment slop */ < 4096)
8626 rtx set = gen_main_base_31_small (base_reg, pool->label);
8627 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8628 INSN_ADDRESSES_NEW (insn, -1);
8629 remove_insn (pool->pool_insn);
8631 insn = emit_label_after (pool->label, insn);
8632 INSN_ADDRESSES_NEW (insn, -1);
8634 /* emit_pool_after will be set by s390_mainpool_start to the
8635 last insn of the section where the literal pool should be
8637 insn = pool->emit_pool_after;
8639 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8640 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8642 s390_dump_pool (pool, 1);
8645 /* Otherwise, we emit an inline literal pool and use BASR to branch
8646 over it, setting up the pool register at the same time. */
8649 rtx_code_label *pool_end = gen_label_rtx ();
8651 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
8652 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
8653 JUMP_LABEL (insn) = pool_end;
8654 INSN_ADDRESSES_NEW (insn, -1);
8655 remove_insn (pool->pool_insn);
8657 insn = emit_label_after (pool->label, insn);
8658 INSN_ADDRESSES_NEW (insn, -1);
8660 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8661 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8663 insn = emit_label_after (pool_end, pool->pool_insn);
8664 INSN_ADDRESSES_NEW (insn, -1);
8666 s390_dump_pool (pool, 1);
8670 /* Replace all literal pool references. */
8672 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8675 replace_ltrel_base (&PATTERN (insn));
8677 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8679 rtx addr, pool_ref = NULL_RTX;
8680 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8683 if (s390_execute_label (insn))
8684 addr = s390_find_execute (pool, insn);
8686 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8687 get_pool_mode (pool_ref));
8689 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8690 INSN_CODE (insn) = -1;
8696 /* Free the pool. */
8697 s390_free_pool (pool);
8700 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8701 We have decided we cannot use this pool, so revert all changes
8702 to the current function that were done by s390_mainpool_start. */
8704 s390_mainpool_cancel (struct constant_pool *pool)
8706 /* We didn't actually change the instruction stream, so simply
8707 free the pool memory. */
8708 s390_free_pool (pool);
8712 /* Chunkify the literal pool. */
8714 #define S390_POOL_CHUNK_MIN 0xc00
8715 #define S390_POOL_CHUNK_MAX 0xe00
8717 static struct constant_pool *
8718 s390_chunkify_start (void)
8720 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8723 rtx pending_ltrel = NULL_RTX;
8726 rtx (*gen_reload_base) (rtx, rtx) =
8727 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
8730 /* We need correct insn addresses. */
8732 shorten_branches (get_insns ());
8734 /* Scan all insns and move literals to pool chunks. */
8736 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8738 bool section_switch_p = false;
8740 /* Check for pending LTREL_BASE. */
8743 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
8746 gcc_assert (ltrel_base == pending_ltrel);
8747 pending_ltrel = NULL_RTX;
8751 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8754 curr_pool = s390_start_pool (&pool_list, insn);
8756 s390_add_execute (curr_pool, insn);
8757 s390_add_pool_insn (curr_pool, insn);
8759 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8761 rtx pool_ref = NULL_RTX;
8762 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8765 rtx constant = get_pool_constant (pool_ref);
8766 machine_mode mode = get_pool_mode (pool_ref);
8769 curr_pool = s390_start_pool (&pool_list, insn);
8771 s390_add_constant (curr_pool, constant, mode);
8772 s390_add_pool_insn (curr_pool, insn);
8774 /* Don't split the pool chunk between a LTREL_OFFSET load
8775 and the corresponding LTREL_BASE. */
8776 if (GET_CODE (constant) == CONST
8777 && GET_CODE (XEXP (constant, 0)) == UNSPEC
8778 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
8780 gcc_assert (!pending_ltrel);
8781 pending_ltrel = pool_ref;
8786 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
8789 s390_add_pool_insn (curr_pool, insn);
8790 /* An LTREL_BASE must follow within the same basic block. */
8791 gcc_assert (!pending_ltrel);
8795 switch (NOTE_KIND (insn))
8797 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
8798 section_switch_p = true;
8800 case NOTE_INSN_VAR_LOCATION:
8801 case NOTE_INSN_CALL_ARG_LOCATION:
8808 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
8809 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
8812 if (TARGET_CPU_ZARCH)
8814 if (curr_pool->size < S390_POOL_CHUNK_MAX)
8817 s390_end_pool (curr_pool, NULL);
8822 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
8823 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
8826 /* We will later have to insert base register reload insns.
8827 Those will have an effect on code size, which we need to
8828 consider here. This calculation makes rather pessimistic
8829 worst-case assumptions. */
8833 if (chunk_size < S390_POOL_CHUNK_MIN
8834 && curr_pool->size < S390_POOL_CHUNK_MIN
8835 && !section_switch_p)
8838 /* Pool chunks can only be inserted after BARRIERs ... */
8839 if (BARRIER_P (insn))
8841 s390_end_pool (curr_pool, insn);
8846 /* ... so if we don't find one in time, create one. */
8847 else if (chunk_size > S390_POOL_CHUNK_MAX
8848 || curr_pool->size > S390_POOL_CHUNK_MAX
8849 || section_switch_p)
8851 rtx_insn *label, *jump, *barrier, *next, *prev;
8853 if (!section_switch_p)
8855 /* We can insert the barrier only after a 'real' insn. */
8856 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
8858 if (get_attr_length (insn) == 0)
8860 /* Don't separate LTREL_BASE from the corresponding
8861 LTREL_OFFSET load. */
8868 next = NEXT_INSN (insn);
8872 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
8873 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
8877 gcc_assert (!pending_ltrel);
8879 /* The old pool has to end before the section switch
8880 note in order to make it part of the current
8882 insn = PREV_INSN (insn);
8885 label = gen_label_rtx ();
8887 if (prev && NOTE_P (prev))
8888 prev = prev_nonnote_insn (prev);
8890 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
8891 INSN_LOCATION (prev));
8893 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
8894 barrier = emit_barrier_after (jump);
8895 insn = emit_label_after (label, barrier);
8896 JUMP_LABEL (jump) = label;
8897 LABEL_NUSES (label) = 1;
8899 INSN_ADDRESSES_NEW (jump, -1);
8900 INSN_ADDRESSES_NEW (barrier, -1);
8901 INSN_ADDRESSES_NEW (insn, -1);
8903 s390_end_pool (curr_pool, barrier);
8911 s390_end_pool (curr_pool, NULL);
8912 gcc_assert (!pending_ltrel);
8914 /* Find all labels that are branched into
8915 from an insn belonging to a different chunk. */
8917 far_labels = BITMAP_ALLOC (NULL);
8919 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8921 rtx_jump_table_data *table;
8923 /* Labels marked with LABEL_PRESERVE_P can be target
8924 of non-local jumps, so we have to mark them.
8925 The same holds for named labels.
8927 Don't do that, however, if it is the label before
8931 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
8933 rtx_insn *vec_insn = NEXT_INSN (insn);
8934 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
8935 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
8937 /* Check potential targets in a table jump (casesi_jump). */
8938 else if (tablejump_p (insn, NULL, &table))
8940 rtx vec_pat = PATTERN (table);
8941 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
8943 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
8945 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
8947 if (s390_find_pool (pool_list, label)
8948 != s390_find_pool (pool_list, insn))
8949 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8952 /* If we have a direct jump (conditional or unconditional),
8953 check all potential targets. */
8954 else if (JUMP_P (insn))
8956 rtx pat = PATTERN (insn);
8958 if (GET_CODE (pat) == PARALLEL)
8959 pat = XVECEXP (pat, 0, 0);
8961 if (GET_CODE (pat) == SET)
8963 rtx label = JUMP_LABEL (insn);
8964 if (label && !ANY_RETURN_P (label))
8966 if (s390_find_pool (pool_list, label)
8967 != s390_find_pool (pool_list, insn))
8968 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8974 /* Insert base register reload insns before every pool. */
8976 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8978 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8980 rtx_insn *insn = curr_pool->first_insn;
8981 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
8984 /* Insert base register reload insns at every far label. */
8986 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8988 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
8990 struct constant_pool *pool = s390_find_pool (pool_list, insn);
8993 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8995 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9000 BITMAP_FREE (far_labels);
9003 /* Recompute insn addresses. */
9005 init_insn_lengths ();
9006 shorten_branches (get_insns ());
9011 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9012 After we have decided to use this list, finish implementing
9013 all changes to the current function as required. */
9016 s390_chunkify_finish (struct constant_pool *pool_list)
9018 struct constant_pool *curr_pool = NULL;
9022 /* Replace all literal pool references. */
9024 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9027 replace_ltrel_base (&PATTERN (insn));
9029 curr_pool = s390_find_pool (pool_list, insn);
9033 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9035 rtx addr, pool_ref = NULL_RTX;
9036 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9039 if (s390_execute_label (insn))
9040 addr = s390_find_execute (curr_pool, insn);
9042 addr = s390_find_constant (curr_pool,
9043 get_pool_constant (pool_ref),
9044 get_pool_mode (pool_ref));
9046 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9047 INSN_CODE (insn) = -1;
9052 /* Dump out all literal pools. */
9054 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9055 s390_dump_pool (curr_pool, 0);
9057 /* Free pool list. */
9061 struct constant_pool *next = pool_list->next;
9062 s390_free_pool (pool_list);
9067 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9068 We have decided we cannot use this list, so revert all changes
9069 to the current function that were done by s390_chunkify_start. */
9072 s390_chunkify_cancel (struct constant_pool *pool_list)
9074 struct constant_pool *curr_pool = NULL;
9077 /* Remove all pool placeholder insns. */
9079 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9081 /* Did we insert an extra barrier? Remove it. */
9082 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
9083 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
9084 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
9086 if (jump && JUMP_P (jump)
9087 && barrier && BARRIER_P (barrier)
9088 && label && LABEL_P (label)
9089 && GET_CODE (PATTERN (jump)) == SET
9090 && SET_DEST (PATTERN (jump)) == pc_rtx
9091 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
9092 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
9095 remove_insn (barrier);
9096 remove_insn (label);
9099 remove_insn (curr_pool->pool_insn);
9102 /* Remove all base register reload insns. */
9104 for (insn = get_insns (); insn; )
9106 rtx_insn *next_insn = NEXT_INSN (insn);
9108 if (NONJUMP_INSN_P (insn)
9109 && GET_CODE (PATTERN (insn)) == SET
9110 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
9111 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
9117 /* Free pool list. */
9121 struct constant_pool *next = pool_list->next;
9122 s390_free_pool (pool_list);
9127 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9130 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9132 switch (GET_MODE_CLASS (mode))
9135 case MODE_DECIMAL_FLOAT:
9136 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9138 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp), mode, align);
9142 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9143 mark_symbol_refs_as_used (exp);
9146 case MODE_VECTOR_INT:
9147 case MODE_VECTOR_FLOAT:
9150 machine_mode inner_mode;
9151 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9153 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9154 for (i = 0; i < XVECLEN (exp, 0); i++)
9155 s390_output_pool_entry (XVECEXP (exp, 0, i),
9159 : GET_MODE_BITSIZE (inner_mode));
9169 /* Return an RTL expression representing the value of the return address
9170 for the frame COUNT steps up from the current frame. FRAME is the
9171 frame pointer of that frame. */
9174 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9179 /* Without backchain, we fail for all but the current frame. */
9181 if (!TARGET_BACKCHAIN && count > 0)
9184 /* For the current frame, we need to make sure the initial
9185 value of RETURN_REGNUM is actually saved. */
9189 /* On non-z architectures branch splitting could overwrite r14. */
9190 if (TARGET_CPU_ZARCH)
9191 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9194 cfun_frame_layout.save_return_addr_p = true;
9195 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
9199 if (TARGET_PACKED_STACK)
9200 offset = -2 * UNITS_PER_LONG;
9202 offset = RETURN_REGNUM * UNITS_PER_LONG;
9204 addr = plus_constant (Pmode, frame, offset);
9205 addr = memory_address (Pmode, addr);
9206 return gen_rtx_MEM (Pmode, addr);
9209 /* Return an RTL expression representing the back chain stored in
9210 the current stack frame. */
9213 s390_back_chain_rtx (void)
9217 gcc_assert (TARGET_BACKCHAIN);
9219 if (TARGET_PACKED_STACK)
9220 chain = plus_constant (Pmode, stack_pointer_rtx,
9221 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9223 chain = stack_pointer_rtx;
9225 chain = gen_rtx_MEM (Pmode, chain);
9229 /* Find first call clobbered register unused in a function.
9230 This could be used as base register in a leaf function
9231 or for holding the return address before epilogue. */
9234 find_unused_clobbered_reg (void)
9237 for (i = 0; i < 6; i++)
9238 if (!df_regs_ever_live_p (i))
9244 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9245 clobbered hard regs in SETREG. */
9248 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9250 char *regs_ever_clobbered = (char *)data;
9251 unsigned int i, regno;
9252 machine_mode mode = GET_MODE (setreg);
9254 if (GET_CODE (setreg) == SUBREG)
9256 rtx inner = SUBREG_REG (setreg);
9257 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9259 regno = subreg_regno (setreg);
9261 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9262 regno = REGNO (setreg);
9267 i < regno + HARD_REGNO_NREGS (regno, mode);
9269 regs_ever_clobbered[i] = 1;
9272 /* Walks through all basic blocks of the current function looking
9273 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9274 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9275 each of those regs. */
9278 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9284 memset (regs_ever_clobbered, 0, 32);
9286 /* For non-leaf functions we have to consider all call clobbered regs to be
9290 for (i = 0; i < 32; i++)
9291 regs_ever_clobbered[i] = call_really_used_regs[i];
9294 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9295 this work is done by liveness analysis (mark_regs_live_at_end).
9296 Special care is needed for functions containing landing pads. Landing pads
9297 may use the eh registers, but the code which sets these registers is not
9298 contained in that function. Hence s390_regs_ever_clobbered is not able to
9299 deal with this automatically. */
9300 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9301 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9302 if (crtl->calls_eh_return
9303 || (cfun->machine->has_landing_pad_p
9304 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9305 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9307 /* For nonlocal gotos all call-saved registers have to be saved.
9308 This flag is also set for the unwinding code in libgcc.
9309 See expand_builtin_unwind_init. For regs_ever_live this is done by
9311 if (crtl->saves_all_registers)
9312 for (i = 0; i < 32; i++)
9313 if (!call_really_used_regs[i])
9314 regs_ever_clobbered[i] = 1;
9316 FOR_EACH_BB_FN (cur_bb, cfun)
9318 FOR_BB_INSNS (cur_bb, cur_insn)
9322 if (!INSN_P (cur_insn))
9325 pat = PATTERN (cur_insn);
9327 /* Ignore GPR restore insns. */
9328 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9330 if (GET_CODE (pat) == SET
9331 && GENERAL_REG_P (SET_DEST (pat)))
9334 if (GET_MODE (SET_SRC (pat)) == DImode
9335 && FP_REG_P (SET_SRC (pat)))
9339 if (GET_CODE (SET_SRC (pat)) == MEM)
9344 if (GET_CODE (pat) == PARALLEL
9345 && load_multiple_operation (pat, VOIDmode))
9350 s390_reg_clobbered_rtx,
9351 regs_ever_clobbered);
9356 /* Determine the frame area which actually has to be accessed
9357 in the function epilogue. The values are stored at the
9358 given pointers AREA_BOTTOM (address of the lowest used stack
9359 address) and AREA_TOP (address of the first item which does
9360 not belong to the stack frame). */
9363 s390_frame_area (int *area_bottom, int *area_top)
9370 if (cfun_frame_layout.first_restore_gpr != -1)
9372 b = (cfun_frame_layout.gprs_offset
9373 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9374 t = b + (cfun_frame_layout.last_restore_gpr
9375 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9378 if (TARGET_64BIT && cfun_save_high_fprs_p)
9380 b = MIN (b, cfun_frame_layout.f8_offset);
9381 t = MAX (t, (cfun_frame_layout.f8_offset
9382 + cfun_frame_layout.high_fprs * 8));
9387 if (cfun_fpr_save_p (FPR4_REGNUM))
9389 b = MIN (b, cfun_frame_layout.f4_offset);
9390 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9392 if (cfun_fpr_save_p (FPR6_REGNUM))
9394 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9395 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9401 /* Update gpr_save_slots in the frame layout trying to make use of
9402 FPRs as GPR save slots.
9403 This is a helper routine of s390_register_info. */
9406 s390_register_info_gprtofpr ()
9408 int save_reg_slot = FPR0_REGNUM;
9411 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9414 for (i = 15; i >= 6; i--)
9416 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9419 /* Advance to the next FP register which can be used as a
9421 while ((!call_really_used_regs[save_reg_slot]
9422 || df_regs_ever_live_p (save_reg_slot)
9423 || cfun_fpr_save_p (save_reg_slot))
9424 && FP_REGNO_P (save_reg_slot))
9426 if (!FP_REGNO_P (save_reg_slot))
9428 /* We only want to use ldgr/lgdr if we can get rid of
9429 stm/lm entirely. So undo the gpr slot allocation in
9430 case we ran out of FPR save slots. */
9431 for (j = 6; j <= 15; j++)
9432 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9433 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9436 cfun_gpr_save_slot (i) = save_reg_slot++;
9440 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9442 This is a helper routine for s390_register_info. */
9445 s390_register_info_stdarg_fpr ()
9451 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9452 f0-f4 for 64 bit. */
9454 || !TARGET_HARD_FLOAT
9455 || !cfun->va_list_fpr_size
9456 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9459 min_fpr = crtl->args.info.fprs;
9460 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9461 if (max_fpr >= FP_ARG_NUM_REG)
9462 max_fpr = FP_ARG_NUM_REG - 1;
9464 /* FPR argument regs start at f0. */
9465 min_fpr += FPR0_REGNUM;
9466 max_fpr += FPR0_REGNUM;
9468 for (i = min_fpr; i <= max_fpr; i++)
9469 cfun_set_fpr_save (i);
9472 /* Reserve the GPR save slots for GPRs which need to be saved due to
9474 This is a helper routine for s390_register_info. */
9477 s390_register_info_stdarg_gpr ()
9484 || !cfun->va_list_gpr_size
9485 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9488 min_gpr = crtl->args.info.gprs;
9489 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9490 if (max_gpr >= GP_ARG_NUM_REG)
9491 max_gpr = GP_ARG_NUM_REG - 1;
9493 /* GPR argument regs start at r2. */
9494 min_gpr += GPR2_REGNUM;
9495 max_gpr += GPR2_REGNUM;
9497 /* If r6 was supposed to be saved into an FPR and now needs to go to
9498 the stack for vararg we have to adjust the restore range to make
9499 sure that the restore is done from stack as well. */
9500 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9501 && min_gpr <= GPR6_REGNUM
9502 && max_gpr >= GPR6_REGNUM)
9504 if (cfun_frame_layout.first_restore_gpr == -1
9505 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9506 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9507 if (cfun_frame_layout.last_restore_gpr == -1
9508 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9509 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9512 if (cfun_frame_layout.first_save_gpr == -1
9513 || cfun_frame_layout.first_save_gpr > min_gpr)
9514 cfun_frame_layout.first_save_gpr = min_gpr;
9516 if (cfun_frame_layout.last_save_gpr == -1
9517 || cfun_frame_layout.last_save_gpr < max_gpr)
9518 cfun_frame_layout.last_save_gpr = max_gpr;
9520 for (i = min_gpr; i <= max_gpr; i++)
9521 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9524 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9525 prologue and epilogue. */
9528 s390_register_info_set_ranges ()
9532 /* Find the first and the last save slot supposed to use the stack
9533 to set the restore range.
9534 Vararg regs might be marked as save to stack but only the
9535 call-saved regs really need restoring (i.e. r6). This code
9536 assumes that the vararg regs have not yet been recorded in
9537 cfun_gpr_save_slot. */
9538 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9539 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9540 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9541 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9542 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9543 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9546 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9547 for registers which need to be saved in function prologue.
9548 This function can be used until the insns emitted for save/restore
9549 of the regs are visible in the RTL stream. */
9552 s390_register_info ()
9555 char clobbered_regs[32];
9557 gcc_assert (!epilogue_completed);
9559 if (reload_completed)
9560 /* After reload we rely on our own routine to determine which
9561 registers need saving. */
9562 s390_regs_ever_clobbered (clobbered_regs);
9564 /* During reload we use regs_ever_live as a base since reload
9565 does changes in there which we otherwise would not be aware
9567 for (i = 0; i < 32; i++)
9568 clobbered_regs[i] = df_regs_ever_live_p (i);
9570 for (i = 0; i < 32; i++)
9571 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9573 /* Mark the call-saved FPRs which need to be saved.
9574 This needs to be done before checking the special GPRs since the
9575 stack pointer usage depends on whether high FPRs have to be saved
9577 cfun_frame_layout.fpr_bitmap = 0;
9578 cfun_frame_layout.high_fprs = 0;
9579 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9580 if (clobbered_regs[i] && !call_really_used_regs[i])
9582 cfun_set_fpr_save (i);
9583 if (i >= FPR8_REGNUM)
9584 cfun_frame_layout.high_fprs++;
9587 /* Register 12 is used for GOT address, but also as temp in prologue
9588 for split-stack stdarg functions (unless r14 is available). */
9590 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9591 || (flag_split_stack && cfun->stdarg
9592 && (crtl->is_leaf || TARGET_TPF_PROFILING
9593 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9595 clobbered_regs[BASE_REGNUM]
9596 |= (cfun->machine->base_reg
9597 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9599 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9600 |= !!frame_pointer_needed;
9602 /* On pre z900 machines this might take until machine dependent
9604 save_return_addr_p will only be set on non-zarch machines so
9605 there is no risk that r14 goes into an FPR instead of a stack
9607 clobbered_regs[RETURN_REGNUM]
9609 || TARGET_TPF_PROFILING
9610 || cfun->machine->split_branches_pending_p
9611 || cfun_frame_layout.save_return_addr_p
9612 || crtl->calls_eh_return);
9614 clobbered_regs[STACK_POINTER_REGNUM]
9616 || TARGET_TPF_PROFILING
9617 || cfun_save_high_fprs_p
9618 || get_frame_size () > 0
9619 || (reload_completed && cfun_frame_layout.frame_size > 0)
9620 || cfun->calls_alloca);
9622 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9624 for (i = 6; i < 16; i++)
9625 if (clobbered_regs[i])
9626 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9628 s390_register_info_stdarg_fpr ();
9629 s390_register_info_gprtofpr ();
9630 s390_register_info_set_ranges ();
9631 /* stdarg functions might need to save GPRs 2 to 6. This might
9632 override the GPR->FPR save decision made by
9633 s390_register_info_gprtofpr for r6 since vararg regs must go to
9635 s390_register_info_stdarg_gpr ();
9638 /* This function is called by s390_optimize_prologue in order to get
9639 rid of unnecessary GPR save/restore instructions. The register info
9640 for the GPRs is re-computed and the ranges are re-calculated. */
9643 s390_optimize_register_info ()
9645 char clobbered_regs[32];
9648 gcc_assert (epilogue_completed);
9649 gcc_assert (!cfun->machine->split_branches_pending_p);
9651 s390_regs_ever_clobbered (clobbered_regs);
9653 for (i = 0; i < 32; i++)
9654 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9656 /* There is still special treatment needed for cases invisible to
9657 s390_regs_ever_clobbered. */
9658 clobbered_regs[RETURN_REGNUM]
9659 |= (TARGET_TPF_PROFILING
9660 /* When expanding builtin_return_addr in ESA mode we do not
9661 know whether r14 will later be needed as scratch reg when
9662 doing branch splitting. So the builtin always accesses the
9663 r14 save slot and we need to stick to the save/restore
9664 decision for r14 even if it turns out that it didn't get
9666 || cfun_frame_layout.save_return_addr_p
9667 || crtl->calls_eh_return);
9669 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9671 for (i = 6; i < 16; i++)
9672 if (!clobbered_regs[i])
9673 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9675 s390_register_info_set_ranges ();
9676 s390_register_info_stdarg_gpr ();
9679 /* Fill cfun->machine with info about frame of current function. */
9682 s390_frame_info (void)
9684 HOST_WIDE_INT lowest_offset;
9686 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9687 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9689 /* The va_arg builtin uses a constant distance of 16 *
9690 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9691 pointer. So even if we are going to save the stack pointer in an
9692 FPR we need the stack space in order to keep the offsets
9694 if (cfun->stdarg && cfun_save_arg_fprs_p)
9696 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9698 if (cfun_frame_layout.first_save_gpr_slot == -1)
9699 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9702 cfun_frame_layout.frame_size = get_frame_size ();
9703 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9704 fatal_error (input_location,
9705 "total size of local variables exceeds architecture limit");
9707 if (!TARGET_PACKED_STACK)
9709 /* Fixed stack layout. */
9710 cfun_frame_layout.backchain_offset = 0;
9711 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9712 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9713 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9714 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9717 else if (TARGET_BACKCHAIN)
9719 /* Kernel stack layout - packed stack, backchain, no float */
9720 gcc_assert (TARGET_SOFT_FLOAT);
9721 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9724 /* The distance between the backchain and the return address
9725 save slot must not change. So we always need a slot for the
9726 stack pointer which resides in between. */
9727 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9729 cfun_frame_layout.gprs_offset
9730 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9732 /* FPRs will not be saved. Nevertheless pick sane values to
9733 keep area calculations valid. */
9734 cfun_frame_layout.f0_offset =
9735 cfun_frame_layout.f4_offset =
9736 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9742 /* Packed stack layout without backchain. */
9744 /* With stdarg FPRs need their dedicated slots. */
9745 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9746 : (cfun_fpr_save_p (FPR4_REGNUM) +
9747 cfun_fpr_save_p (FPR6_REGNUM)));
9748 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9750 num_fprs = (cfun->stdarg ? 2
9751 : (cfun_fpr_save_p (FPR0_REGNUM)
9752 + cfun_fpr_save_p (FPR2_REGNUM)));
9753 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9755 cfun_frame_layout.gprs_offset
9756 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9758 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9759 - cfun_frame_layout.high_fprs * 8);
9762 if (cfun_save_high_fprs_p)
9763 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9766 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9768 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9769 sized area at the bottom of the stack. This is required also for
9770 leaf functions. When GCC generates a local stack reference it
9771 will always add STACK_POINTER_OFFSET to all these references. */
9773 && !TARGET_TPF_PROFILING
9774 && cfun_frame_layout.frame_size == 0
9775 && !cfun->calls_alloca)
9778 /* Calculate the number of bytes we have used in our own register
9779 save area. With the packed stack layout we can re-use the
9780 remaining bytes for normal stack elements. */
9782 if (TARGET_PACKED_STACK)
9783 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9784 cfun_frame_layout.f4_offset),
9785 cfun_frame_layout.gprs_offset);
9789 if (TARGET_BACKCHAIN)
9790 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9792 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9794 /* If under 31 bit an odd number of gprs has to be saved we have to
9795 adjust the frame size to sustain 8 byte alignment of stack
9797 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9798 STACK_BOUNDARY / BITS_PER_UNIT - 1)
9799 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9802 /* Generate frame layout. Fills in register and frame data for the current
9803 function in cfun->machine. This routine can be called multiple times;
9804 it will re-do the complete frame layout every time. */
9807 s390_init_frame_layout (void)
9809 HOST_WIDE_INT frame_size;
9812 /* After LRA the frame layout is supposed to be read-only and should
9813 not be re-computed. */
9814 if (reload_completed)
9817 /* On S/390 machines, we may need to perform branch splitting, which
9818 will require both base and return address register. We have no
9819 choice but to assume we're going to need them until right at the
9820 end of the machine dependent reorg phase. */
9821 if (!TARGET_CPU_ZARCH)
9822 cfun->machine->split_branches_pending_p = true;
9826 frame_size = cfun_frame_layout.frame_size;
9828 /* Try to predict whether we'll need the base register. */
9829 base_used = cfun->machine->split_branches_pending_p
9830 || crtl->uses_const_pool
9831 || (!DISP_IN_RANGE (frame_size)
9832 && !CONST_OK_FOR_K (frame_size));
9834 /* Decide which register to use as literal pool base. In small
9835 leaf functions, try to use an unused call-clobbered register
9836 as base register to avoid save/restore overhead. */
9838 cfun->machine->base_reg = NULL_RTX;
9844 /* Prefer r5 (most likely to be free). */
9845 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
9847 cfun->machine->base_reg =
9848 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
9851 s390_register_info ();
9854 while (frame_size != cfun_frame_layout.frame_size);
9857 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
9858 the TX is nonescaping. A transaction is considered escaping if
9859 there is at least one path from tbegin returning CC0 to the
9860 function exit block without an tend.
9862 The check so far has some limitations:
9863 - only single tbegin/tend BBs are supported
9864 - the first cond jump after tbegin must separate the CC0 path from ~CC0
9865 - when CC is copied to a GPR and the CC0 check is done with the GPR
9866 this is not supported
9870 s390_optimize_nonescaping_tx (void)
9872 const unsigned int CC0 = 1 << 3;
9873 basic_block tbegin_bb = NULL;
9874 basic_block tend_bb = NULL;
9879 rtx_insn *tbegin_insn = NULL;
9881 if (!cfun->machine->tbegin_p)
9884 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
9886 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
9891 FOR_BB_INSNS (bb, insn)
9893 rtx ite, cc, pat, target;
9894 unsigned HOST_WIDE_INT mask;
9896 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
9899 pat = PATTERN (insn);
9901 if (GET_CODE (pat) == PARALLEL)
9902 pat = XVECEXP (pat, 0, 0);
9904 if (GET_CODE (pat) != SET
9905 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
9908 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
9914 /* Just return if the tbegin doesn't have clobbers. */
9915 if (GET_CODE (PATTERN (insn)) != PARALLEL)
9918 if (tbegin_bb != NULL)
9921 /* Find the next conditional jump. */
9922 for (tmp = NEXT_INSN (insn);
9924 tmp = NEXT_INSN (tmp))
9926 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
9931 ite = SET_SRC (PATTERN (tmp));
9932 if (GET_CODE (ite) != IF_THEN_ELSE)
9935 cc = XEXP (XEXP (ite, 0), 0);
9936 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
9937 || GET_MODE (cc) != CCRAWmode
9938 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
9941 if (bb->succs->length () != 2)
9944 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
9945 if (GET_CODE (XEXP (ite, 0)) == NE)
9949 target = XEXP (ite, 1);
9950 else if (mask == (CC0 ^ 0xf))
9951 target = XEXP (ite, 2);
9959 ei = ei_start (bb->succs);
9960 e1 = ei_safe_edge (ei);
9962 e2 = ei_safe_edge (ei);
9964 if (e2->flags & EDGE_FALLTHRU)
9967 e1 = ei_safe_edge (ei);
9970 if (!(e1->flags & EDGE_FALLTHRU))
9973 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
9975 if (tmp == BB_END (bb))
9980 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
9982 if (tend_bb != NULL)
9989 /* Either we successfully remove the FPR clobbers here or we are not
9990 able to do anything for this TX. Both cases don't qualify for
9992 cfun->machine->tbegin_p = false;
9994 if (tbegin_bb == NULL || tend_bb == NULL)
9997 calculate_dominance_info (CDI_POST_DOMINATORS);
9998 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
9999 free_dominance_info (CDI_POST_DOMINATORS);
10004 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10006 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10007 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10008 INSN_CODE (tbegin_insn) = -1;
10009 df_insn_rescan (tbegin_insn);
10014 /* Return true if it is legal to put a value with MODE into REGNO. */
10017 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10019 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10022 switch (REGNO_REG_CLASS (regno))
10025 return ((GET_MODE_CLASS (mode) == MODE_INT
10026 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10028 || s390_vector_mode_supported_p (mode));
10032 && ((GET_MODE_CLASS (mode) == MODE_INT
10033 && s390_class_max_nregs (FP_REGS, mode) == 1)
10035 || s390_vector_mode_supported_p (mode)))
10038 if (REGNO_PAIR_OK (regno, mode))
10040 if (mode == SImode || mode == DImode)
10043 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10048 if (FRAME_REGNO_P (regno) && mode == Pmode)
10053 if (REGNO_PAIR_OK (regno, mode))
10056 || (mode != TFmode && mode != TCmode && mode != TDmode))
10061 if (GET_MODE_CLASS (mode) == MODE_CC)
10065 if (REGNO_PAIR_OK (regno, mode))
10067 if (mode == SImode || mode == Pmode)
10078 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10081 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10083 /* Once we've decided upon a register to use as base register, it must
10084 no longer be used for any other purpose. */
10085 if (cfun->machine->base_reg)
10086 if (REGNO (cfun->machine->base_reg) == old_reg
10087 || REGNO (cfun->machine->base_reg) == new_reg)
10090 /* Prevent regrename from using call-saved regs which haven't
10091 actually been saved. This is necessary since regrename assumes
10092 the backend save/restore decisions are based on
10093 df_regs_ever_live. Since we have our own routine we have to tell
10094 regrename manually about it. */
10095 if (GENERAL_REGNO_P (new_reg)
10096 && !call_really_used_regs[new_reg]
10097 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10103 /* Return nonzero if register REGNO can be used as a scratch register
10107 s390_hard_regno_scratch_ok (unsigned int regno)
10109 /* See s390_hard_regno_rename_ok. */
10110 if (GENERAL_REGNO_P (regno)
10111 && !call_really_used_regs[regno]
10112 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10118 /* Maximum number of registers to represent a value of mode MODE
10119 in a register of class RCLASS. */
10122 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10125 bool reg_pair_required_p = false;
10131 reg_size = TARGET_VX ? 16 : 8;
10133 /* TF and TD modes would fit into a VR but we put them into a
10134 register pair since we do not have 128bit FP instructions on
10137 && SCALAR_FLOAT_MODE_P (mode)
10138 && GET_MODE_SIZE (mode) >= 16)
10139 reg_pair_required_p = true;
10141 /* Even if complex types would fit into a single FPR/VR we force
10142 them into a register pair to deal with the parts more easily.
10143 (FIXME: What about complex ints?) */
10144 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10145 reg_pair_required_p = true;
10151 reg_size = UNITS_PER_WORD;
10155 if (reg_pair_required_p)
10156 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10158 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10161 /* Return TRUE if changing mode from FROM to TO should not be allowed
10162 for register class CLASS. */
10165 s390_cannot_change_mode_class (machine_mode from_mode,
10166 machine_mode to_mode,
10167 enum reg_class rclass)
10169 machine_mode small_mode;
10170 machine_mode big_mode;
10172 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10175 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10177 small_mode = from_mode;
10178 big_mode = to_mode;
10182 small_mode = to_mode;
10183 big_mode = from_mode;
10186 /* Values residing in VRs are little-endian style. All modes are
10187 placed left-aligned in an VR. This means that we cannot allow
10188 switching between modes with differing sizes. Also if the vector
10189 facility is available we still place TFmode values in VR register
10190 pairs, since the only instructions we have operating on TFmodes
10191 only deal with register pairs. Therefore we have to allow DFmode
10192 subregs of TFmodes to enable the TFmode splitters. */
10193 if (reg_classes_intersect_p (VEC_REGS, rclass)
10194 && (GET_MODE_SIZE (small_mode) < 8
10195 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10198 /* Likewise for access registers, since they have only half the
10199 word size on 64-bit. */
10200 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10206 /* Return true if we use LRA instead of reload pass. */
10210 return s390_lra_flag;
10213 /* Return true if register FROM can be eliminated via register TO. */
10216 s390_can_eliminate (const int from, const int to)
10218 /* On zSeries machines, we have not marked the base register as fixed.
10219 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10220 If a function requires the base register, we say here that this
10221 elimination cannot be performed. This will cause reload to free
10222 up the base register (as if it were fixed). On the other hand,
10223 if the current function does *not* require the base register, we
10224 say here the elimination succeeds, which in turn allows reload
10225 to allocate the base register for any other purpose. */
10226 if (from == BASE_REGNUM && to == BASE_REGNUM)
10228 if (TARGET_CPU_ZARCH)
10230 s390_init_frame_layout ();
10231 return cfun->machine->base_reg == NULL_RTX;
10237 /* Everything else must point into the stack frame. */
10238 gcc_assert (to == STACK_POINTER_REGNUM
10239 || to == HARD_FRAME_POINTER_REGNUM);
10241 gcc_assert (from == FRAME_POINTER_REGNUM
10242 || from == ARG_POINTER_REGNUM
10243 || from == RETURN_ADDRESS_POINTER_REGNUM);
10245 /* Make sure we actually saved the return address. */
10246 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10247 if (!crtl->calls_eh_return
10249 && !cfun_frame_layout.save_return_addr_p)
10255 /* Return offset between register FROM and TO initially after prolog. */
10258 s390_initial_elimination_offset (int from, int to)
10260 HOST_WIDE_INT offset;
10262 /* ??? Why are we called for non-eliminable pairs? */
10263 if (!s390_can_eliminate (from, to))
10268 case FRAME_POINTER_REGNUM:
10269 offset = (get_frame_size()
10270 + STACK_POINTER_OFFSET
10271 + crtl->outgoing_args_size);
10274 case ARG_POINTER_REGNUM:
10275 s390_init_frame_layout ();
10276 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10279 case RETURN_ADDRESS_POINTER_REGNUM:
10280 s390_init_frame_layout ();
10282 if (cfun_frame_layout.first_save_gpr_slot == -1)
10284 /* If it turns out that for stdarg nothing went into the reg
10285 save area we also do not need the return address
10287 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10290 gcc_unreachable ();
10293 /* In order to make the following work it is not necessary for
10294 r14 to have a save slot. It is sufficient if one other GPR
10295 got one. Since the GPRs are always stored without gaps we
10296 are able to calculate where the r14 save slot would
10298 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10299 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10308 gcc_unreachable ();
10314 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10315 to register BASE. Return generated insn. */
10318 save_fpr (rtx base, int offset, int regnum)
10321 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10323 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10324 set_mem_alias_set (addr, get_varargs_alias_set ());
10326 set_mem_alias_set (addr, get_frame_alias_set ());
10328 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10331 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10332 to register BASE. Return generated insn. */
10335 restore_fpr (rtx base, int offset, int regnum)
10338 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10339 set_mem_alias_set (addr, get_frame_alias_set ());
10341 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10344 /* Return true if REGNO is a global register, but not one
10345 of the special ones that need to be saved/restored in anyway. */
10348 global_not_special_regno_p (int regno)
10350 return (global_regs[regno]
10351 /* These registers are special and need to be
10352 restored in any case. */
10353 && !(regno == STACK_POINTER_REGNUM
10354 || regno == RETURN_REGNUM
10355 || regno == BASE_REGNUM
10356 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10359 /* Generate insn to save registers FIRST to LAST into
10360 the register save area located at offset OFFSET
10361 relative to register BASE. */
10364 save_gprs (rtx base, int offset, int first, int last)
10366 rtx addr, insn, note;
10369 addr = plus_constant (Pmode, base, offset);
10370 addr = gen_rtx_MEM (Pmode, addr);
10372 set_mem_alias_set (addr, get_frame_alias_set ());
10374 /* Special-case single register. */
10378 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10380 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10382 if (!global_not_special_regno_p (first))
10383 RTX_FRAME_RELATED_P (insn) = 1;
10388 insn = gen_store_multiple (addr,
10389 gen_rtx_REG (Pmode, first),
10390 GEN_INT (last - first + 1));
10392 if (first <= 6 && cfun->stdarg)
10393 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10395 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10397 if (first + i <= 6)
10398 set_mem_alias_set (mem, get_varargs_alias_set ());
10401 /* We need to set the FRAME_RELATED flag on all SETs
10402 inside the store-multiple pattern.
10404 However, we must not emit DWARF records for registers 2..5
10405 if they are stored for use by variable arguments ...
10407 ??? Unfortunately, it is not enough to simply not the
10408 FRAME_RELATED flags for those SETs, because the first SET
10409 of the PARALLEL is always treated as if it had the flag
10410 set, even if it does not. Therefore we emit a new pattern
10411 without those registers as REG_FRAME_RELATED_EXPR note. */
10413 if (first >= 6 && !global_not_special_regno_p (first))
10415 rtx pat = PATTERN (insn);
10417 for (i = 0; i < XVECLEN (pat, 0); i++)
10418 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10419 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10421 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10423 RTX_FRAME_RELATED_P (insn) = 1;
10425 else if (last >= 6)
10429 for (start = first >= 6 ? first : 6; start <= last; start++)
10430 if (!global_not_special_regno_p (start))
10436 addr = plus_constant (Pmode, base,
10437 offset + (start - first) * UNITS_PER_LONG);
10442 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10443 gen_rtx_REG (Pmode, start));
10445 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10446 gen_rtx_REG (Pmode, start));
10447 note = PATTERN (note);
10449 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10450 RTX_FRAME_RELATED_P (insn) = 1;
10455 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10456 gen_rtx_REG (Pmode, start),
10457 GEN_INT (last - start + 1));
10458 note = PATTERN (note);
10460 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10462 for (i = 0; i < XVECLEN (note, 0); i++)
10463 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10464 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10466 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10468 RTX_FRAME_RELATED_P (insn) = 1;
10474 /* Generate insn to restore registers FIRST to LAST from
10475 the register save area located at offset OFFSET
10476 relative to register BASE. */
10479 restore_gprs (rtx base, int offset, int first, int last)
10483 addr = plus_constant (Pmode, base, offset);
10484 addr = gen_rtx_MEM (Pmode, addr);
10485 set_mem_alias_set (addr, get_frame_alias_set ());
10487 /* Special-case single register. */
10491 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10493 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10495 RTX_FRAME_RELATED_P (insn) = 1;
10499 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10501 GEN_INT (last - first + 1));
10502 RTX_FRAME_RELATED_P (insn) = 1;
10506 /* Return insn sequence to load the GOT register. */
10508 static GTY(()) rtx got_symbol;
10510 s390_load_got (void)
10514 /* We cannot use pic_offset_table_rtx here since we use this
10515 function also for non-pic if __tls_get_offset is called and in
10516 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10518 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10522 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10523 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
10528 if (TARGET_CPU_ZARCH)
10530 emit_move_insn (got_rtx, got_symbol);
10536 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
10537 UNSPEC_LTREL_OFFSET);
10538 offset = gen_rtx_CONST (Pmode, offset);
10539 offset = force_const_mem (Pmode, offset);
10541 emit_move_insn (got_rtx, offset);
10543 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
10544 UNSPEC_LTREL_BASE);
10545 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
10547 emit_move_insn (got_rtx, offset);
10550 insns = get_insns ();
10555 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10556 and the change to the stack pointer. */
10559 s390_emit_stack_tie (void)
10561 rtx mem = gen_frame_mem (BLKmode,
10562 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10564 emit_insn (gen_stack_tie (mem));
10567 /* Copy GPRS into FPR save slots. */
10570 s390_save_gprs_to_fprs (void)
10574 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10577 for (i = 6; i < 16; i++)
10579 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10582 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10583 gen_rtx_REG (DImode, i));
10584 RTX_FRAME_RELATED_P (insn) = 1;
10585 /* This prevents dwarf2cfi from interpreting the set. Doing
10586 so it might emit def_cfa_register infos setting an FPR as
10588 add_reg_note (insn, REG_CFA_REGISTER, PATTERN (insn));
10593 /* Restore GPRs from FPR save slots. */
10596 s390_restore_gprs_from_fprs (void)
10600 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10603 for (i = 6; i < 16; i++)
10607 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10610 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10612 if (i == STACK_POINTER_REGNUM)
10613 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10615 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10617 df_set_regs_ever_live (i, true);
10618 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10619 if (i == STACK_POINTER_REGNUM)
10620 add_reg_note (insn, REG_CFA_DEF_CFA,
10621 plus_constant (Pmode, stack_pointer_rtx,
10622 STACK_POINTER_OFFSET));
10623 RTX_FRAME_RELATED_P (insn) = 1;
10628 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10633 const pass_data pass_data_s390_early_mach =
10635 RTL_PASS, /* type */
10636 "early_mach", /* name */
10637 OPTGROUP_NONE, /* optinfo_flags */
10638 TV_MACH_DEP, /* tv_id */
10639 0, /* properties_required */
10640 0, /* properties_provided */
10641 0, /* properties_destroyed */
10642 0, /* todo_flags_start */
10643 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10646 class pass_s390_early_mach : public rtl_opt_pass
10649 pass_s390_early_mach (gcc::context *ctxt)
10650 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10653 /* opt_pass methods: */
10654 virtual unsigned int execute (function *);
10656 }; // class pass_s390_early_mach
10659 pass_s390_early_mach::execute (function *fun)
10663 /* Try to get rid of the FPR clobbers. */
10664 s390_optimize_nonescaping_tx ();
10666 /* Re-compute register info. */
10667 s390_register_info ();
10669 /* If we're using a base register, ensure that it is always valid for
10670 the first non-prologue instruction. */
10671 if (fun->machine->base_reg)
10672 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10674 /* Annotate all constant pool references to let the scheduler know
10675 they implicitly use the base register. */
10676 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10679 annotate_constant_pool_refs (&PATTERN (insn));
10680 df_insn_rescan (insn);
10685 } // anon namespace
10687 /* Expand the prologue into a bunch of separate insns. */
10690 s390_emit_prologue (void)
10698 /* Choose best register to use for temp use within prologue.
10699 TPF with profiling must avoid the register 14 - the tracing function
10700 needs the original contents of r14 to be preserved. */
10702 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10704 && !TARGET_TPF_PROFILING)
10705 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10706 else if (flag_split_stack && cfun->stdarg)
10707 temp_reg = gen_rtx_REG (Pmode, 12);
10709 temp_reg = gen_rtx_REG (Pmode, 1);
10711 s390_save_gprs_to_fprs ();
10713 /* Save call saved gprs. */
10714 if (cfun_frame_layout.first_save_gpr != -1)
10716 insn = save_gprs (stack_pointer_rtx,
10717 cfun_frame_layout.gprs_offset +
10718 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
10719 - cfun_frame_layout.first_save_gpr_slot),
10720 cfun_frame_layout.first_save_gpr,
10721 cfun_frame_layout.last_save_gpr);
10725 /* Dummy insn to mark literal pool slot. */
10727 if (cfun->machine->base_reg)
10728 emit_insn (gen_main_pool (cfun->machine->base_reg));
10730 offset = cfun_frame_layout.f0_offset;
10732 /* Save f0 and f2. */
10733 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
10735 if (cfun_fpr_save_p (i))
10737 save_fpr (stack_pointer_rtx, offset, i);
10740 else if (!TARGET_PACKED_STACK || cfun->stdarg)
10744 /* Save f4 and f6. */
10745 offset = cfun_frame_layout.f4_offset;
10746 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10748 if (cfun_fpr_save_p (i))
10750 insn = save_fpr (stack_pointer_rtx, offset, i);
10753 /* If f4 and f6 are call clobbered they are saved due to
10754 stdargs and therefore are not frame related. */
10755 if (!call_really_used_regs[i])
10756 RTX_FRAME_RELATED_P (insn) = 1;
10758 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
10762 if (TARGET_PACKED_STACK
10763 && cfun_save_high_fprs_p
10764 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
10766 offset = (cfun_frame_layout.f8_offset
10767 + (cfun_frame_layout.high_fprs - 1) * 8);
10769 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
10770 if (cfun_fpr_save_p (i))
10772 insn = save_fpr (stack_pointer_rtx, offset, i);
10774 RTX_FRAME_RELATED_P (insn) = 1;
10777 if (offset >= cfun_frame_layout.f8_offset)
10781 if (!TARGET_PACKED_STACK)
10782 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
10784 if (flag_stack_usage_info)
10785 current_function_static_stack_size = cfun_frame_layout.frame_size;
10787 /* Decrement stack pointer. */
10789 if (cfun_frame_layout.frame_size > 0)
10791 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10792 rtx real_frame_off;
10794 if (s390_stack_size)
10796 HOST_WIDE_INT stack_guard;
10798 if (s390_stack_guard)
10799 stack_guard = s390_stack_guard;
10802 /* If no value for stack guard is provided the smallest power of 2
10803 larger than the current frame size is chosen. */
10805 while (stack_guard < cfun_frame_layout.frame_size)
10809 if (cfun_frame_layout.frame_size >= s390_stack_size)
10811 warning (0, "frame size of function %qs is %wd"
10812 " bytes exceeding user provided stack limit of "
10814 "An unconditional trap is added.",
10815 current_function_name(), cfun_frame_layout.frame_size,
10817 emit_insn (gen_trap ());
10822 /* stack_guard has to be smaller than s390_stack_size.
10823 Otherwise we would emit an AND with zero which would
10824 not match the test under mask pattern. */
10825 if (stack_guard >= s390_stack_size)
10827 warning (0, "frame size of function %qs is %wd"
10828 " bytes which is more than half the stack size. "
10829 "The dynamic check would not be reliable. "
10830 "No check emitted for this function.",
10831 current_function_name(),
10832 cfun_frame_layout.frame_size);
10836 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
10837 & ~(stack_guard - 1));
10839 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
10840 GEN_INT (stack_check_mask));
10842 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
10844 t, const0_rtx, const0_rtx));
10846 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
10848 t, const0_rtx, const0_rtx));
10853 if (s390_warn_framesize > 0
10854 && cfun_frame_layout.frame_size >= s390_warn_framesize)
10855 warning (0, "frame size of %qs is %wd bytes",
10856 current_function_name (), cfun_frame_layout.frame_size);
10858 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
10859 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
10861 /* Save incoming stack pointer into temp reg. */
10862 if (TARGET_BACKCHAIN || next_fpr)
10863 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
10865 /* Subtract frame size from stack pointer. */
10867 if (DISP_IN_RANGE (INTVAL (frame_off)))
10869 insn = gen_rtx_SET (stack_pointer_rtx,
10870 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10872 insn = emit_insn (insn);
10876 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10877 frame_off = force_const_mem (Pmode, frame_off);
10879 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
10880 annotate_constant_pool_refs (&PATTERN (insn));
10883 RTX_FRAME_RELATED_P (insn) = 1;
10884 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10885 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10886 gen_rtx_SET (stack_pointer_rtx,
10887 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10890 /* Set backchain. */
10892 if (TARGET_BACKCHAIN)
10894 if (cfun_frame_layout.backchain_offset)
10895 addr = gen_rtx_MEM (Pmode,
10896 plus_constant (Pmode, stack_pointer_rtx,
10897 cfun_frame_layout.backchain_offset));
10899 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10900 set_mem_alias_set (addr, get_frame_alias_set ());
10901 insn = emit_insn (gen_move_insn (addr, temp_reg));
10904 /* If we support non-call exceptions (e.g. for Java),
10905 we need to make sure the backchain pointer is set up
10906 before any possibly trapping memory access. */
10907 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
10909 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10910 emit_clobber (addr);
10914 /* Save fprs 8 - 15 (64 bit ABI). */
10916 if (cfun_save_high_fprs_p && next_fpr)
10918 /* If the stack might be accessed through a different register
10919 we have to make sure that the stack pointer decrement is not
10920 moved below the use of the stack slots. */
10921 s390_emit_stack_tie ();
10923 insn = emit_insn (gen_add2_insn (temp_reg,
10924 GEN_INT (cfun_frame_layout.f8_offset)));
10928 for (i = FPR8_REGNUM; i <= next_fpr; i++)
10929 if (cfun_fpr_save_p (i))
10931 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
10932 cfun_frame_layout.frame_size
10933 + cfun_frame_layout.f8_offset
10936 insn = save_fpr (temp_reg, offset, i);
10938 RTX_FRAME_RELATED_P (insn) = 1;
10939 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10940 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
10941 gen_rtx_REG (DFmode, i)));
10945 /* Set frame pointer, if needed. */
10947 if (frame_pointer_needed)
10949 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10950 RTX_FRAME_RELATED_P (insn) = 1;
10953 /* Set up got pointer, if needed. */
10955 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10957 rtx_insn *insns = s390_load_got ();
10959 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
10960 annotate_constant_pool_refs (&PATTERN (insn));
10965 if (TARGET_TPF_PROFILING)
10967 /* Generate a BAS instruction to serve as a function
10968 entry intercept to facilitate the use of tracing
10969 algorithms located at the branch target. */
10970 emit_insn (gen_prologue_tpf ());
10972 /* Emit a blockage here so that all code
10973 lies between the profiling mechanisms. */
10974 emit_insn (gen_blockage ());
10978 /* Expand the epilogue into a bunch of separate insns. */
10981 s390_emit_epilogue (bool sibcall)
10983 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
10984 int area_bottom, area_top, offset = 0;
10989 if (TARGET_TPF_PROFILING)
10992 /* Generate a BAS instruction to serve as a function
10993 entry intercept to facilitate the use of tracing
10994 algorithms located at the branch target. */
10996 /* Emit a blockage here so that all code
10997 lies between the profiling mechanisms. */
10998 emit_insn (gen_blockage ());
11000 emit_insn (gen_epilogue_tpf ());
11003 /* Check whether to use frame or stack pointer for restore. */
11005 frame_pointer = (frame_pointer_needed
11006 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11008 s390_frame_area (&area_bottom, &area_top);
11010 /* Check whether we can access the register save area.
11011 If not, increment the frame pointer as required. */
11013 if (area_top <= area_bottom)
11015 /* Nothing to restore. */
11017 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11018 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11020 /* Area is in range. */
11021 offset = cfun_frame_layout.frame_size;
11025 rtx insn, frame_off, cfa;
11027 offset = area_bottom < 0 ? -area_bottom : 0;
11028 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11030 cfa = gen_rtx_SET (frame_pointer,
11031 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11032 if (DISP_IN_RANGE (INTVAL (frame_off)))
11034 insn = gen_rtx_SET (frame_pointer,
11035 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11036 insn = emit_insn (insn);
11040 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11041 frame_off = force_const_mem (Pmode, frame_off);
11043 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11044 annotate_constant_pool_refs (&PATTERN (insn));
11046 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11047 RTX_FRAME_RELATED_P (insn) = 1;
11050 /* Restore call saved fprs. */
11054 if (cfun_save_high_fprs_p)
11056 next_offset = cfun_frame_layout.f8_offset;
11057 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11059 if (cfun_fpr_save_p (i))
11061 restore_fpr (frame_pointer,
11062 offset + next_offset, i);
11064 = alloc_reg_note (REG_CFA_RESTORE,
11065 gen_rtx_REG (DFmode, i), cfa_restores);
11074 next_offset = cfun_frame_layout.f4_offset;
11076 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11078 if (cfun_fpr_save_p (i))
11080 restore_fpr (frame_pointer,
11081 offset + next_offset, i);
11083 = alloc_reg_note (REG_CFA_RESTORE,
11084 gen_rtx_REG (DFmode, i), cfa_restores);
11087 else if (!TARGET_PACKED_STACK)
11093 /* Return register. */
11095 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11097 /* Restore call saved gprs. */
11099 if (cfun_frame_layout.first_restore_gpr != -1)
11104 /* Check for global register and save them
11105 to stack location from where they get restored. */
11107 for (i = cfun_frame_layout.first_restore_gpr;
11108 i <= cfun_frame_layout.last_restore_gpr;
11111 if (global_not_special_regno_p (i))
11113 addr = plus_constant (Pmode, frame_pointer,
11114 offset + cfun_frame_layout.gprs_offset
11115 + (i - cfun_frame_layout.first_save_gpr_slot)
11117 addr = gen_rtx_MEM (Pmode, addr);
11118 set_mem_alias_set (addr, get_frame_alias_set ());
11119 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11123 = alloc_reg_note (REG_CFA_RESTORE,
11124 gen_rtx_REG (Pmode, i), cfa_restores);
11129 /* Fetch return address from stack before load multiple,
11130 this will do good for scheduling.
11132 Only do this if we already decided that r14 needs to be
11133 saved to a stack slot. (And not just because r14 happens to
11134 be in between two GPRs which need saving.) Otherwise it
11135 would be difficult to take that decision back in
11136 s390_optimize_prologue. */
11137 if (cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK)
11139 int return_regnum = find_unused_clobbered_reg();
11140 if (!return_regnum)
11142 return_reg = gen_rtx_REG (Pmode, return_regnum);
11144 addr = plus_constant (Pmode, frame_pointer,
11145 offset + cfun_frame_layout.gprs_offset
11147 - cfun_frame_layout.first_save_gpr_slot)
11149 addr = gen_rtx_MEM (Pmode, addr);
11150 set_mem_alias_set (addr, get_frame_alias_set ());
11151 emit_move_insn (return_reg, addr);
11153 /* Once we did that optimization we have to make sure
11154 s390_optimize_prologue does not try to remove the
11155 store of r14 since we will not be able to find the
11156 load issued here. */
11157 cfun_frame_layout.save_return_addr_p = true;
11161 insn = restore_gprs (frame_pointer,
11162 offset + cfun_frame_layout.gprs_offset
11163 + (cfun_frame_layout.first_restore_gpr
11164 - cfun_frame_layout.first_save_gpr_slot)
11166 cfun_frame_layout.first_restore_gpr,
11167 cfun_frame_layout.last_restore_gpr);
11168 insn = emit_insn (insn);
11169 REG_NOTES (insn) = cfa_restores;
11170 add_reg_note (insn, REG_CFA_DEF_CFA,
11171 plus_constant (Pmode, stack_pointer_rtx,
11172 STACK_POINTER_OFFSET));
11173 RTX_FRAME_RELATED_P (insn) = 1;
11176 s390_restore_gprs_from_fprs ();
11181 /* Return to caller. */
11183 p = rtvec_alloc (2);
11185 RTVEC_ELT (p, 0) = ret_rtx;
11186 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
11187 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
11191 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11194 s300_set_up_by_prologue (hard_reg_set_container *regs)
11196 if (cfun->machine->base_reg
11197 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11198 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11201 /* -fsplit-stack support. */
11203 /* A SYMBOL_REF for __morestack. */
11204 static GTY(()) rtx morestack_ref;
11206 /* When using -fsplit-stack, the allocation routines set a field in
11207 the TCB to the bottom of the stack plus this much space, measured
11210 #define SPLIT_STACK_AVAILABLE 1024
11212 /* Emit -fsplit-stack prologue, which goes before the regular function
11216 s390_expand_split_stack_prologue (void)
11218 rtx r1, guard, cc = NULL;
11220 /* Offset from thread pointer to __private_ss. */
11221 int psso = TARGET_64BIT ? 0x38 : 0x20;
11222 /* Pointer size in bytes. */
11223 /* Frame size and argument size - the two parameters to __morestack. */
11224 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11225 /* Align argument size to 8 bytes - simplifies __morestack code. */
11226 HOST_WIDE_INT args_size = crtl->args.size >= 0
11227 ? ((crtl->args.size + 7) & ~7)
11229 /* Label to be called by __morestack. */
11230 rtx_code_label *call_done = NULL;
11231 rtx_code_label *parm_base = NULL;
11234 gcc_assert (flag_split_stack && reload_completed);
11235 if (!TARGET_CPU_ZARCH)
11237 sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11241 r1 = gen_rtx_REG (Pmode, 1);
11243 /* If no stack frame will be allocated, don't do anything. */
11246 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11248 /* If va_start is used, just use r15. */
11249 emit_move_insn (r1,
11250 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11251 GEN_INT (STACK_POINTER_OFFSET)));
11257 if (morestack_ref == NULL_RTX)
11259 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11260 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11261 | SYMBOL_FLAG_FUNCTION);
11264 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11266 /* If frame_size will fit in an add instruction, do a stack space
11267 check, and only call __morestack if there's not enough space. */
11269 /* Get thread pointer. r1 is the only register we can always destroy - r0
11270 could contain a static chain (and cannot be used to address memory
11271 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11272 emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
11273 /* Aim at __private_ss. */
11274 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11276 /* If less that 1kiB used, skip addition and compare directly with
11278 if (frame_size > SPLIT_STACK_AVAILABLE)
11280 emit_move_insn (r1, guard);
11282 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11284 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11288 /* Compare the (maybe adjusted) guard with the stack pointer. */
11289 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11292 call_done = gen_label_rtx ();
11293 parm_base = gen_label_rtx ();
11295 /* Emit the parameter block. */
11296 tmp = gen_split_stack_data (parm_base, call_done,
11297 GEN_INT (frame_size),
11298 GEN_INT (args_size));
11299 insn = emit_insn (tmp);
11300 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11301 LABEL_NUSES (call_done)++;
11302 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11303 LABEL_NUSES (parm_base)++;
11305 /* %r1 = litbase. */
11306 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11307 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11308 LABEL_NUSES (parm_base)++;
11310 /* Now, we need to call __morestack. It has very special calling
11311 conventions: it preserves param/return/static chain registers for
11312 calling main function body, and looks for its own parameters at %r1. */
11316 tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
11318 insn = emit_jump_insn (tmp);
11319 JUMP_LABEL (insn) = call_done;
11320 LABEL_NUSES (call_done)++;
11322 /* Mark the jump as very unlikely to be taken. */
11323 add_int_reg_note (insn, REG_BR_PROB, REG_BR_PROB_BASE / 100);
11325 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11327 /* If va_start is used, and __morestack was not called, just use
11329 emit_move_insn (r1,
11330 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11331 GEN_INT (STACK_POINTER_OFFSET)));
11336 tmp = gen_split_stack_call (morestack_ref, call_done);
11337 insn = emit_jump_insn (tmp);
11338 JUMP_LABEL (insn) = call_done;
11339 LABEL_NUSES (call_done)++;
11343 /* __morestack will call us here. */
11345 emit_label (call_done);
11348 /* We may have to tell the dataflow pass that the split stack prologue
11349 is initializing a register. */
11352 s390_live_on_entry (bitmap regs)
11354 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11356 gcc_assert (flag_split_stack);
11357 bitmap_set_bit (regs, 1);
11361 /* Return true if the function can use simple_return to return outside
11362 of a shrink-wrapped region. At present shrink-wrapping is supported
11366 s390_can_use_simple_return_insn (void)
11371 /* Return true if the epilogue is guaranteed to contain only a return
11372 instruction and if a direct return can therefore be used instead.
11373 One of the main advantages of using direct return instructions
11374 is that we can then use conditional returns. */
11377 s390_can_use_return_insn (void)
11381 if (!reload_completed)
11387 if (TARGET_TPF_PROFILING)
11390 for (i = 0; i < 16; i++)
11391 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11394 /* For 31 bit this is not covered by the frame_size check below
11395 since f4, f6 are saved in the register save area without needing
11396 additional stack space. */
11398 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11401 if (cfun->machine->base_reg
11402 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11405 return cfun_frame_layout.frame_size == 0;
11408 /* The VX ABI differs for vararg functions. Therefore we need the
11409 prototype of the callee to be available when passing vector type
11411 static const char *
11412 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11414 return ((TARGET_VX_ABI
11416 && VECTOR_TYPE_P (TREE_TYPE (val))
11417 && (funcdecl == NULL_TREE
11418 || (TREE_CODE (funcdecl) == FUNCTION_DECL
11419 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11420 ? N_("Vector argument passed to unprototyped function")
11425 /* Return the size in bytes of a function argument of
11426 type TYPE and/or mode MODE. At least one of TYPE or
11427 MODE must be specified. */
11430 s390_function_arg_size (machine_mode mode, const_tree type)
11433 return int_size_in_bytes (type);
11435 /* No type info available for some library calls ... */
11436 if (mode != BLKmode)
11437 return GET_MODE_SIZE (mode);
11439 /* If we have neither type nor mode, abort */
11440 gcc_unreachable ();
11443 /* Return true if a function argument of type TYPE and mode MODE
11444 is to be passed in a vector register, if available. */
11447 s390_function_arg_vector (machine_mode mode, const_tree type)
11449 if (!TARGET_VX_ABI)
11452 if (s390_function_arg_size (mode, type) > 16)
11455 /* No type info available for some library calls ... */
11457 return VECTOR_MODE_P (mode);
11459 /* The ABI says that record types with a single member are treated
11460 just like that member would be. */
11461 while (TREE_CODE (type) == RECORD_TYPE)
11463 tree field, single = NULL_TREE;
11465 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11467 if (TREE_CODE (field) != FIELD_DECL)
11470 if (single == NULL_TREE)
11471 single = TREE_TYPE (field);
11476 if (single == NULL_TREE)
11480 /* If the field declaration adds extra byte due to
11481 e.g. padding this is not accepted as vector type. */
11482 if (int_size_in_bytes (single) <= 0
11483 || int_size_in_bytes (single) != int_size_in_bytes (type))
11489 return VECTOR_TYPE_P (type);
11492 /* Return true if a function argument of type TYPE and mode MODE
11493 is to be passed in a floating-point register, if available. */
11496 s390_function_arg_float (machine_mode mode, const_tree type)
11498 if (s390_function_arg_size (mode, type) > 8)
11501 /* Soft-float changes the ABI: no floating-point registers are used. */
11502 if (TARGET_SOFT_FLOAT)
11505 /* No type info available for some library calls ... */
11507 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11509 /* The ABI says that record types with a single member are treated
11510 just like that member would be. */
11511 while (TREE_CODE (type) == RECORD_TYPE)
11513 tree field, single = NULL_TREE;
11515 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11517 if (TREE_CODE (field) != FIELD_DECL)
11520 if (single == NULL_TREE)
11521 single = TREE_TYPE (field);
11526 if (single == NULL_TREE)
11532 return TREE_CODE (type) == REAL_TYPE;
11535 /* Return true if a function argument of type TYPE and mode MODE
11536 is to be passed in an integer register, or a pair of integer
11537 registers, if available. */
11540 s390_function_arg_integer (machine_mode mode, const_tree type)
11542 int size = s390_function_arg_size (mode, type);
11546 /* No type info available for some library calls ... */
11548 return GET_MODE_CLASS (mode) == MODE_INT
11549 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
11551 /* We accept small integral (and similar) types. */
11552 if (INTEGRAL_TYPE_P (type)
11553 || POINTER_TYPE_P (type)
11554 || TREE_CODE (type) == NULLPTR_TYPE
11555 || TREE_CODE (type) == OFFSET_TYPE
11556 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
11559 /* We also accept structs of size 1, 2, 4, 8 that are not
11560 passed in floating-point registers. */
11561 if (AGGREGATE_TYPE_P (type)
11562 && exact_log2 (size) >= 0
11563 && !s390_function_arg_float (mode, type))
11569 /* Return 1 if a function argument of type TYPE and mode MODE
11570 is to be passed by reference. The ABI specifies that only
11571 structures of size 1, 2, 4, or 8 bytes are passed by value,
11572 all other structures (and complex numbers) are passed by
11576 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
11577 machine_mode mode, const_tree type,
11578 bool named ATTRIBUTE_UNUSED)
11580 int size = s390_function_arg_size (mode, type);
11582 if (s390_function_arg_vector (mode, type))
11590 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
11593 if (TREE_CODE (type) == COMPLEX_TYPE
11594 || TREE_CODE (type) == VECTOR_TYPE)
11601 /* Update the data in CUM to advance over an argument of mode MODE and
11602 data type TYPE. (TYPE is null for libcalls where that information
11603 may not be available.). The boolean NAMED specifies whether the
11604 argument is a named argument (as opposed to an unnamed argument
11605 matching an ellipsis). */
11608 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
11609 const_tree type, bool named)
11611 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11613 if (s390_function_arg_vector (mode, type))
11615 /* We are called for unnamed vector stdarg arguments which are
11616 passed on the stack. In this case this hook does not have to
11617 do anything since stack arguments are tracked by common
11623 else if (s390_function_arg_float (mode, type))
11627 else if (s390_function_arg_integer (mode, type))
11629 int size = s390_function_arg_size (mode, type);
11630 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
11633 gcc_unreachable ();
11636 /* Define where to put the arguments to a function.
11637 Value is zero to push the argument on the stack,
11638 or a hard register in which to store the argument.
11640 MODE is the argument's machine mode.
11641 TYPE is the data type of the argument (as a tree).
11642 This is null for libcalls where that information may
11644 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11645 the preceding args and about the function being called.
11646 NAMED is nonzero if this argument is a named parameter
11647 (otherwise it is an extra parameter matching an ellipsis).
11649 On S/390, we use general purpose registers 2 through 6 to
11650 pass integer, pointer, and certain structure arguments, and
11651 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
11652 to pass floating point arguments. All remaining arguments
11653 are pushed to the stack. */
11656 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
11657 const_tree type, bool named)
11659 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11662 s390_check_type_for_vector_abi (type, true, false);
11664 if (s390_function_arg_vector (mode, type))
11666 /* Vector arguments being part of the ellipsis are passed on the
11668 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
11671 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
11673 else if (s390_function_arg_float (mode, type))
11675 if (cum->fprs + 1 > FP_ARG_NUM_REG)
11678 return gen_rtx_REG (mode, cum->fprs + 16);
11680 else if (s390_function_arg_integer (mode, type))
11682 int size = s390_function_arg_size (mode, type);
11683 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11685 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
11687 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
11688 return gen_rtx_REG (mode, cum->gprs + 2);
11689 else if (n_gprs == 2)
11691 rtvec p = rtvec_alloc (2);
11694 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
11697 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
11700 return gen_rtx_PARALLEL (mode, p);
11704 /* After the real arguments, expand_call calls us once again
11705 with a void_type_node type. Whatever we return here is
11706 passed as operand 2 to the call expanders.
11708 We don't need this feature ... */
11709 else if (type == void_type_node)
11712 gcc_unreachable ();
11715 /* Return true if return values of type TYPE should be returned
11716 in a memory buffer whose address is passed by the caller as
11717 hidden first argument. */
11720 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
11722 /* We accept small integral (and similar) types. */
11723 if (INTEGRAL_TYPE_P (type)
11724 || POINTER_TYPE_P (type)
11725 || TREE_CODE (type) == OFFSET_TYPE
11726 || TREE_CODE (type) == REAL_TYPE)
11727 return int_size_in_bytes (type) > 8;
11729 /* vector types which fit into a VR. */
11731 && VECTOR_TYPE_P (type)
11732 && int_size_in_bytes (type) <= 16)
11735 /* Aggregates and similar constructs are always returned
11737 if (AGGREGATE_TYPE_P (type)
11738 || TREE_CODE (type) == COMPLEX_TYPE
11739 || VECTOR_TYPE_P (type))
11742 /* ??? We get called on all sorts of random stuff from
11743 aggregate_value_p. We can't abort, but it's not clear
11744 what's safe to return. Pretend it's a struct I guess. */
11748 /* Function arguments and return values are promoted to word size. */
11750 static machine_mode
11751 s390_promote_function_mode (const_tree type, machine_mode mode,
11753 const_tree fntype ATTRIBUTE_UNUSED,
11754 int for_return ATTRIBUTE_UNUSED)
11756 if (INTEGRAL_MODE_P (mode)
11757 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
11759 if (type != NULL_TREE && POINTER_TYPE_P (type))
11760 *punsignedp = POINTERS_EXTEND_UNSIGNED;
11767 /* Define where to return a (scalar) value of type RET_TYPE.
11768 If RET_TYPE is null, define where to return a (scalar)
11769 value of mode MODE from a libcall. */
11772 s390_function_and_libcall_value (machine_mode mode,
11773 const_tree ret_type,
11774 const_tree fntype_or_decl,
11775 bool outgoing ATTRIBUTE_UNUSED)
11777 /* For vector return types it is important to use the RET_TYPE
11778 argument whenever available since the middle-end might have
11779 changed the mode to a scalar mode. */
11780 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
11781 || (!ret_type && VECTOR_MODE_P (mode)));
11783 /* For normal functions perform the promotion as
11784 promote_function_mode would do. */
11787 int unsignedp = TYPE_UNSIGNED (ret_type);
11788 mode = promote_function_mode (ret_type, mode, &unsignedp,
11789 fntype_or_decl, 1);
11792 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
11793 || SCALAR_FLOAT_MODE_P (mode)
11794 || (TARGET_VX_ABI && vector_ret_type_p));
11795 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
11797 if (TARGET_VX_ABI && vector_ret_type_p)
11798 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
11799 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
11800 return gen_rtx_REG (mode, 16);
11801 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
11802 || UNITS_PER_LONG == UNITS_PER_WORD)
11803 return gen_rtx_REG (mode, 2);
11804 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
11806 /* This case is triggered when returning a 64 bit value with
11807 -m31 -mzarch. Although the value would fit into a single
11808 register it has to be forced into a 32 bit register pair in
11809 order to match the ABI. */
11810 rtvec p = rtvec_alloc (2);
11813 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
11815 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
11817 return gen_rtx_PARALLEL (mode, p);
11820 gcc_unreachable ();
11823 /* Define where to return a scalar return value of type RET_TYPE. */
11826 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
11829 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
11830 fn_decl_or_type, outgoing);
11833 /* Define where to return a scalar libcall return value of mode
11837 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
11839 return s390_function_and_libcall_value (mode, NULL_TREE,
11844 /* Create and return the va_list datatype.
11846 On S/390, va_list is an array type equivalent to
11848 typedef struct __va_list_tag
11852 void *__overflow_arg_area;
11853 void *__reg_save_area;
11856 where __gpr and __fpr hold the number of general purpose
11857 or floating point arguments used up to now, respectively,
11858 __overflow_arg_area points to the stack location of the
11859 next argument passed on the stack, and __reg_save_area
11860 always points to the start of the register area in the
11861 call frame of the current function. The function prologue
11862 saves all registers used for argument passing into this
11863 area if the function uses variable arguments. */
11866 s390_build_builtin_va_list (void)
11868 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
11870 record = lang_hooks.types.make_type (RECORD_TYPE);
11873 build_decl (BUILTINS_LOCATION,
11874 TYPE_DECL, get_identifier ("__va_list_tag"), record);
11876 f_gpr = build_decl (BUILTINS_LOCATION,
11877 FIELD_DECL, get_identifier ("__gpr"),
11878 long_integer_type_node);
11879 f_fpr = build_decl (BUILTINS_LOCATION,
11880 FIELD_DECL, get_identifier ("__fpr"),
11881 long_integer_type_node);
11882 f_ovf = build_decl (BUILTINS_LOCATION,
11883 FIELD_DECL, get_identifier ("__overflow_arg_area"),
11885 f_sav = build_decl (BUILTINS_LOCATION,
11886 FIELD_DECL, get_identifier ("__reg_save_area"),
11889 va_list_gpr_counter_field = f_gpr;
11890 va_list_fpr_counter_field = f_fpr;
11892 DECL_FIELD_CONTEXT (f_gpr) = record;
11893 DECL_FIELD_CONTEXT (f_fpr) = record;
11894 DECL_FIELD_CONTEXT (f_ovf) = record;
11895 DECL_FIELD_CONTEXT (f_sav) = record;
11897 TYPE_STUB_DECL (record) = type_decl;
11898 TYPE_NAME (record) = type_decl;
11899 TYPE_FIELDS (record) = f_gpr;
11900 DECL_CHAIN (f_gpr) = f_fpr;
11901 DECL_CHAIN (f_fpr) = f_ovf;
11902 DECL_CHAIN (f_ovf) = f_sav;
11904 layout_type (record);
11906 /* The correct type is an array type of one element. */
11907 return build_array_type (record, build_index_type (size_zero_node));
11910 /* Implement va_start by filling the va_list structure VALIST.
11911 STDARG_P is always true, and ignored.
11912 NEXTARG points to the first anonymous stack argument.
11914 The following global variables are used to initialize
11915 the va_list structure:
11918 holds number of gprs and fprs used for named arguments.
11919 crtl->args.arg_offset_rtx:
11920 holds the offset of the first anonymous stack argument
11921 (relative to the virtual arg pointer). */
11924 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
11926 HOST_WIDE_INT n_gpr, n_fpr;
11928 tree f_gpr, f_fpr, f_ovf, f_sav;
11929 tree gpr, fpr, ovf, sav, t;
11931 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11932 f_fpr = DECL_CHAIN (f_gpr);
11933 f_ovf = DECL_CHAIN (f_fpr);
11934 f_sav = DECL_CHAIN (f_ovf);
11936 valist = build_simple_mem_ref (valist);
11937 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11938 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11939 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11940 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11942 /* Count number of gp and fp argument registers used. */
11944 n_gpr = crtl->args.info.gprs;
11945 n_fpr = crtl->args.info.fprs;
11947 if (cfun->va_list_gpr_size)
11949 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11950 build_int_cst (NULL_TREE, n_gpr));
11951 TREE_SIDE_EFFECTS (t) = 1;
11952 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11955 if (cfun->va_list_fpr_size)
11957 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11958 build_int_cst (NULL_TREE, n_fpr));
11959 TREE_SIDE_EFFECTS (t) = 1;
11960 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11963 if (flag_split_stack
11964 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
11966 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
11971 reg = gen_reg_rtx (Pmode);
11972 cfun->machine->split_stack_varargs_pointer = reg;
11975 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
11976 seq = get_insns ();
11979 push_topmost_sequence ();
11980 emit_insn_after (seq, entry_of_function ());
11981 pop_topmost_sequence ();
11984 /* Find the overflow area.
11985 FIXME: This currently is too pessimistic when the vector ABI is
11986 enabled. In that case we *always* set up the overflow area
11988 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
11989 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
11992 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
11993 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11995 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
11997 off = INTVAL (crtl->args.arg_offset_rtx);
11998 off = off < 0 ? 0 : off;
11999 if (TARGET_DEBUG_ARG)
12000 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12001 (int)n_gpr, (int)n_fpr, off);
12003 t = fold_build_pointer_plus_hwi (t, off);
12005 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12006 TREE_SIDE_EFFECTS (t) = 1;
12007 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12010 /* Find the register save area. */
12011 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12012 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12014 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12015 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12017 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12018 TREE_SIDE_EFFECTS (t) = 1;
12019 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12023 /* Implement va_arg by updating the va_list structure
12024 VALIST as required to retrieve an argument of type
12025 TYPE, and returning that argument.
12027 Generates code equivalent to:
12029 if (integral value) {
12030 if (size <= 4 && args.gpr < 5 ||
12031 size > 4 && args.gpr < 4 )
12032 ret = args.reg_save_area[args.gpr+8]
12034 ret = *args.overflow_arg_area++;
12035 } else if (vector value) {
12036 ret = *args.overflow_arg_area;
12037 args.overflow_arg_area += size / 8;
12038 } else if (float value) {
12040 ret = args.reg_save_area[args.fpr+64]
12042 ret = *args.overflow_arg_area++;
12043 } else if (aggregate value) {
12045 ret = *args.reg_save_area[args.gpr]
12047 ret = **args.overflow_arg_area++;
12051 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12052 gimple_seq *post_p ATTRIBUTE_UNUSED)
12054 tree f_gpr, f_fpr, f_ovf, f_sav;
12055 tree gpr, fpr, ovf, sav, reg, t, u;
12056 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12057 tree lab_false, lab_over;
12058 tree addr = create_tmp_var (ptr_type_node, "addr");
12059 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12062 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12063 f_fpr = DECL_CHAIN (f_gpr);
12064 f_ovf = DECL_CHAIN (f_fpr);
12065 f_sav = DECL_CHAIN (f_ovf);
12067 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12068 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12069 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12071 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12072 both appear on a lhs. */
12073 valist = unshare_expr (valist);
12074 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12076 size = int_size_in_bytes (type);
12078 s390_check_type_for_vector_abi (type, true, false);
12080 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12082 if (TARGET_DEBUG_ARG)
12084 fprintf (stderr, "va_arg: aggregate type");
12088 /* Aggregates are passed by reference. */
12093 /* kernel stack layout on 31 bit: It is assumed here that no padding
12094 will be added by s390_frame_info because for va_args always an even
12095 number of gprs has to be saved r15-r2 = 14 regs. */
12096 sav_ofs = 2 * UNITS_PER_LONG;
12097 sav_scale = UNITS_PER_LONG;
12098 size = UNITS_PER_LONG;
12099 max_reg = GP_ARG_NUM_REG - n_reg;
12100 left_align_p = false;
12102 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12104 if (TARGET_DEBUG_ARG)
12106 fprintf (stderr, "va_arg: vector type");
12116 left_align_p = true;
12118 else if (s390_function_arg_float (TYPE_MODE (type), type))
12120 if (TARGET_DEBUG_ARG)
12122 fprintf (stderr, "va_arg: float type");
12126 /* FP args go in FP registers, if present. */
12130 sav_ofs = 16 * UNITS_PER_LONG;
12132 max_reg = FP_ARG_NUM_REG - n_reg;
12133 left_align_p = false;
12137 if (TARGET_DEBUG_ARG)
12139 fprintf (stderr, "va_arg: other type");
12143 /* Otherwise into GP registers. */
12146 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12148 /* kernel stack layout on 31 bit: It is assumed here that no padding
12149 will be added by s390_frame_info because for va_args always an even
12150 number of gprs has to be saved r15-r2 = 14 regs. */
12151 sav_ofs = 2 * UNITS_PER_LONG;
12153 if (size < UNITS_PER_LONG)
12154 sav_ofs += UNITS_PER_LONG - size;
12156 sav_scale = UNITS_PER_LONG;
12157 max_reg = GP_ARG_NUM_REG - n_reg;
12158 left_align_p = false;
12161 /* Pull the value out of the saved registers ... */
12163 if (reg != NULL_TREE)
12166 if (reg > ((typeof (reg))max_reg))
12169 addr = sav + sav_ofs + reg * save_scale;
12176 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12177 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12179 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12180 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12181 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12182 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12183 gimplify_and_add (t, pre_p);
12185 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12186 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12187 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12188 t = fold_build_pointer_plus (t, u);
12190 gimplify_assign (addr, t, pre_p);
12192 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12194 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12197 /* ... Otherwise out of the overflow area. */
12200 if (size < UNITS_PER_LONG && !left_align_p)
12201 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12203 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12205 gimplify_assign (addr, t, pre_p);
12207 if (size < UNITS_PER_LONG && left_align_p)
12208 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12210 t = fold_build_pointer_plus_hwi (t, size);
12212 gimplify_assign (ovf, t, pre_p);
12214 if (reg != NULL_TREE)
12215 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12218 /* Increment register save count. */
12222 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12223 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12224 gimplify_and_add (u, pre_p);
12229 t = build_pointer_type_for_mode (build_pointer_type (type),
12231 addr = fold_convert (t, addr);
12232 addr = build_va_arg_indirect_ref (addr);
12236 t = build_pointer_type_for_mode (type, ptr_mode, true);
12237 addr = fold_convert (t, addr);
12240 return build_va_arg_indirect_ref (addr);
12243 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12245 DEST - Register location where CC will be stored.
12246 TDB - Pointer to a 256 byte area where to store the transaction.
12247 diagnostic block. NULL if TDB is not needed.
12248 RETRY - Retry count value. If non-NULL a retry loop for CC2
12250 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12251 of the tbegin instruction pattern. */
12254 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12256 rtx retry_plus_two = gen_reg_rtx (SImode);
12257 rtx retry_reg = gen_reg_rtx (SImode);
12258 rtx_code_label *retry_label = NULL;
12260 if (retry != NULL_RTX)
12262 emit_move_insn (retry_reg, retry);
12263 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12264 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12265 retry_label = gen_label_rtx ();
12266 emit_label (retry_label);
12269 if (clobber_fprs_p)
12272 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12275 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12279 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12282 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12283 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12285 UNSPEC_CC_TO_INT));
12286 if (retry != NULL_RTX)
12288 const int CC0 = 1 << 3;
12289 const int CC1 = 1 << 2;
12290 const int CC3 = 1 << 0;
12292 rtx count = gen_reg_rtx (SImode);
12293 rtx_code_label *leave_label = gen_label_rtx ();
12295 /* Exit for success and permanent failures. */
12296 jump = s390_emit_jump (leave_label,
12297 gen_rtx_EQ (VOIDmode,
12298 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12299 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12300 LABEL_NUSES (leave_label) = 1;
12302 /* CC2 - transient failure. Perform retry with ppa. */
12303 emit_move_insn (count, retry_plus_two);
12304 emit_insn (gen_subsi3 (count, count, retry_reg));
12305 emit_insn (gen_tx_assist (count));
12306 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12309 JUMP_LABEL (jump) = retry_label;
12310 LABEL_NUSES (retry_label) = 1;
12311 emit_label (leave_label);
12316 /* Return the decl for the target specific builtin with the function
12320 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12322 if (fcode >= S390_BUILTIN_MAX)
12323 return error_mark_node;
12325 return s390_builtin_decls[fcode];
12328 /* We call mcount before the function prologue. So a profiled leaf
12329 function should stay a leaf function. */
12332 s390_keep_leaf_when_profiled ()
12337 /* Output assembly code for the trampoline template to
12340 On S/390, we use gpr 1 internally in the trampoline code;
12341 gpr 0 is used to hold the static chain. */
12344 s390_asm_trampoline_template (FILE *file)
12347 op[0] = gen_rtx_REG (Pmode, 0);
12348 op[1] = gen_rtx_REG (Pmode, 1);
12352 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12353 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
12354 output_asm_insn ("br\t%1", op); /* 2 byte */
12355 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12359 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12360 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
12361 output_asm_insn ("br\t%1", op); /* 2 byte */
12362 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12366 /* Emit RTL insns to initialize the variable parts of a trampoline.
12367 FNADDR is an RTX for the address of the function's pure code.
12368 CXT is an RTX for the static chain value for the function. */
12371 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12373 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12376 emit_block_move (m_tramp, assemble_trampoline_template (),
12377 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12379 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12380 emit_move_insn (mem, cxt);
12381 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12382 emit_move_insn (mem, fnaddr);
12385 /* Output assembler code to FILE to increment profiler label # LABELNO
12386 for profiling a function entry. */
12389 s390_function_profiler (FILE *file, int labelno)
12394 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12396 fprintf (file, "# function profiler \n");
12398 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12399 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12400 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12402 op[2] = gen_rtx_REG (Pmode, 1);
12403 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12404 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12406 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
12409 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12410 op[4] = gen_rtx_CONST (Pmode, op[4]);
12415 output_asm_insn ("stg\t%0,%1", op);
12416 output_asm_insn ("larl\t%2,%3", op);
12417 output_asm_insn ("brasl\t%0,%4", op);
12418 output_asm_insn ("lg\t%0,%1", op);
12420 else if (TARGET_CPU_ZARCH)
12422 output_asm_insn ("st\t%0,%1", op);
12423 output_asm_insn ("larl\t%2,%3", op);
12424 output_asm_insn ("brasl\t%0,%4", op);
12425 output_asm_insn ("l\t%0,%1", op);
12427 else if (!flag_pic)
12429 op[6] = gen_label_rtx ();
12431 output_asm_insn ("st\t%0,%1", op);
12432 output_asm_insn ("bras\t%2,%l6", op);
12433 output_asm_insn (".long\t%4", op);
12434 output_asm_insn (".long\t%3", op);
12435 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12436 output_asm_insn ("l\t%0,0(%2)", op);
12437 output_asm_insn ("l\t%2,4(%2)", op);
12438 output_asm_insn ("basr\t%0,%0", op);
12439 output_asm_insn ("l\t%0,%1", op);
12443 op[5] = gen_label_rtx ();
12444 op[6] = gen_label_rtx ();
12446 output_asm_insn ("st\t%0,%1", op);
12447 output_asm_insn ("bras\t%2,%l6", op);
12448 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
12449 output_asm_insn (".long\t%4-%l5", op);
12450 output_asm_insn (".long\t%3-%l5", op);
12451 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12452 output_asm_insn ("lr\t%0,%2", op);
12453 output_asm_insn ("a\t%0,0(%2)", op);
12454 output_asm_insn ("a\t%2,4(%2)", op);
12455 output_asm_insn ("basr\t%0,%0", op);
12456 output_asm_insn ("l\t%0,%1", op);
12460 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
12461 into its SYMBOL_REF_FLAGS. */
12464 s390_encode_section_info (tree decl, rtx rtl, int first)
12466 default_encode_section_info (decl, rtl, first);
12468 if (TREE_CODE (decl) == VAR_DECL)
12470 /* Store the alignment to be able to check if we can use
12471 a larl/load-relative instruction. We only handle the cases
12472 that can go wrong (i.e. no FUNC_DECLs). */
12473 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
12474 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12475 else if (DECL_ALIGN (decl) % 32)
12476 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12477 else if (DECL_ALIGN (decl) % 64)
12478 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12481 /* Literal pool references don't have a decl so they are handled
12482 differently here. We rely on the information in the MEM_ALIGN
12483 entry to decide upon the alignment. */
12485 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
12486 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
12488 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
12489 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12490 else if (MEM_ALIGN (rtl) % 32)
12491 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12492 else if (MEM_ALIGN (rtl) % 64)
12493 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12497 /* Output thunk to FILE that implements a C++ virtual function call (with
12498 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
12499 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
12500 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
12501 relative to the resulting this pointer. */
12504 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
12505 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12511 /* Make sure unwind info is emitted for the thunk if needed. */
12512 final_start_function (emit_barrier (), file, 1);
12514 /* Operand 0 is the target function. */
12515 op[0] = XEXP (DECL_RTL (function), 0);
12516 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
12519 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
12520 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
12521 op[0] = gen_rtx_CONST (Pmode, op[0]);
12524 /* Operand 1 is the 'this' pointer. */
12525 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12526 op[1] = gen_rtx_REG (Pmode, 3);
12528 op[1] = gen_rtx_REG (Pmode, 2);
12530 /* Operand 2 is the delta. */
12531 op[2] = GEN_INT (delta);
12533 /* Operand 3 is the vcall_offset. */
12534 op[3] = GEN_INT (vcall_offset);
12536 /* Operand 4 is the temporary register. */
12537 op[4] = gen_rtx_REG (Pmode, 1);
12539 /* Operands 5 to 8 can be used as labels. */
12545 /* Operand 9 can be used for temporary register. */
12548 /* Generate code. */
12551 /* Setup literal pool pointer if required. */
12552 if ((!DISP_IN_RANGE (delta)
12553 && !CONST_OK_FOR_K (delta)
12554 && !CONST_OK_FOR_Os (delta))
12555 || (!DISP_IN_RANGE (vcall_offset)
12556 && !CONST_OK_FOR_K (vcall_offset)
12557 && !CONST_OK_FOR_Os (vcall_offset)))
12559 op[5] = gen_label_rtx ();
12560 output_asm_insn ("larl\t%4,%5", op);
12563 /* Add DELTA to this pointer. */
12566 if (CONST_OK_FOR_J (delta))
12567 output_asm_insn ("la\t%1,%2(%1)", op);
12568 else if (DISP_IN_RANGE (delta))
12569 output_asm_insn ("lay\t%1,%2(%1)", op);
12570 else if (CONST_OK_FOR_K (delta))
12571 output_asm_insn ("aghi\t%1,%2", op);
12572 else if (CONST_OK_FOR_Os (delta))
12573 output_asm_insn ("agfi\t%1,%2", op);
12576 op[6] = gen_label_rtx ();
12577 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
12581 /* Perform vcall adjustment. */
12584 if (DISP_IN_RANGE (vcall_offset))
12586 output_asm_insn ("lg\t%4,0(%1)", op);
12587 output_asm_insn ("ag\t%1,%3(%4)", op);
12589 else if (CONST_OK_FOR_K (vcall_offset))
12591 output_asm_insn ("lghi\t%4,%3", op);
12592 output_asm_insn ("ag\t%4,0(%1)", op);
12593 output_asm_insn ("ag\t%1,0(%4)", op);
12595 else if (CONST_OK_FOR_Os (vcall_offset))
12597 output_asm_insn ("lgfi\t%4,%3", op);
12598 output_asm_insn ("ag\t%4,0(%1)", op);
12599 output_asm_insn ("ag\t%1,0(%4)", op);
12603 op[7] = gen_label_rtx ();
12604 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
12605 output_asm_insn ("ag\t%4,0(%1)", op);
12606 output_asm_insn ("ag\t%1,0(%4)", op);
12610 /* Jump to target. */
12611 output_asm_insn ("jg\t%0", op);
12613 /* Output literal pool if required. */
12616 output_asm_insn (".align\t4", op);
12617 targetm.asm_out.internal_label (file, "L",
12618 CODE_LABEL_NUMBER (op[5]));
12622 targetm.asm_out.internal_label (file, "L",
12623 CODE_LABEL_NUMBER (op[6]));
12624 output_asm_insn (".long\t%2", op);
12628 targetm.asm_out.internal_label (file, "L",
12629 CODE_LABEL_NUMBER (op[7]));
12630 output_asm_insn (".long\t%3", op);
12635 /* Setup base pointer if required. */
12637 || (!DISP_IN_RANGE (delta)
12638 && !CONST_OK_FOR_K (delta)
12639 && !CONST_OK_FOR_Os (delta))
12640 || (!DISP_IN_RANGE (delta)
12641 && !CONST_OK_FOR_K (vcall_offset)
12642 && !CONST_OK_FOR_Os (vcall_offset)))
12644 op[5] = gen_label_rtx ();
12645 output_asm_insn ("basr\t%4,0", op);
12646 targetm.asm_out.internal_label (file, "L",
12647 CODE_LABEL_NUMBER (op[5]));
12650 /* Add DELTA to this pointer. */
12653 if (CONST_OK_FOR_J (delta))
12654 output_asm_insn ("la\t%1,%2(%1)", op);
12655 else if (DISP_IN_RANGE (delta))
12656 output_asm_insn ("lay\t%1,%2(%1)", op);
12657 else if (CONST_OK_FOR_K (delta))
12658 output_asm_insn ("ahi\t%1,%2", op);
12659 else if (CONST_OK_FOR_Os (delta))
12660 output_asm_insn ("afi\t%1,%2", op);
12663 op[6] = gen_label_rtx ();
12664 output_asm_insn ("a\t%1,%6-%5(%4)", op);
12668 /* Perform vcall adjustment. */
12671 if (CONST_OK_FOR_J (vcall_offset))
12673 output_asm_insn ("l\t%4,0(%1)", op);
12674 output_asm_insn ("a\t%1,%3(%4)", op);
12676 else if (DISP_IN_RANGE (vcall_offset))
12678 output_asm_insn ("l\t%4,0(%1)", op);
12679 output_asm_insn ("ay\t%1,%3(%4)", op);
12681 else if (CONST_OK_FOR_K (vcall_offset))
12683 output_asm_insn ("lhi\t%4,%3", op);
12684 output_asm_insn ("a\t%4,0(%1)", op);
12685 output_asm_insn ("a\t%1,0(%4)", op);
12687 else if (CONST_OK_FOR_Os (vcall_offset))
12689 output_asm_insn ("iilf\t%4,%3", op);
12690 output_asm_insn ("a\t%4,0(%1)", op);
12691 output_asm_insn ("a\t%1,0(%4)", op);
12695 op[7] = gen_label_rtx ();
12696 output_asm_insn ("l\t%4,%7-%5(%4)", op);
12697 output_asm_insn ("a\t%4,0(%1)", op);
12698 output_asm_insn ("a\t%1,0(%4)", op);
12701 /* We had to clobber the base pointer register.
12702 Re-setup the base pointer (with a different base). */
12703 op[5] = gen_label_rtx ();
12704 output_asm_insn ("basr\t%4,0", op);
12705 targetm.asm_out.internal_label (file, "L",
12706 CODE_LABEL_NUMBER (op[5]));
12709 /* Jump to target. */
12710 op[8] = gen_label_rtx ();
12713 output_asm_insn ("l\t%4,%8-%5(%4)", op);
12714 else if (!nonlocal)
12715 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12716 /* We cannot call through .plt, since .plt requires %r12 loaded. */
12717 else if (flag_pic == 1)
12719 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12720 output_asm_insn ("l\t%4,%0(%4)", op);
12722 else if (flag_pic == 2)
12724 op[9] = gen_rtx_REG (Pmode, 0);
12725 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
12726 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12727 output_asm_insn ("ar\t%4,%9", op);
12728 output_asm_insn ("l\t%4,0(%4)", op);
12731 output_asm_insn ("br\t%4", op);
12733 /* Output literal pool. */
12734 output_asm_insn (".align\t4", op);
12736 if (nonlocal && flag_pic == 2)
12737 output_asm_insn (".long\t%0", op);
12740 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
12741 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
12744 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
12746 output_asm_insn (".long\t%0", op);
12748 output_asm_insn (".long\t%0-%5", op);
12752 targetm.asm_out.internal_label (file, "L",
12753 CODE_LABEL_NUMBER (op[6]));
12754 output_asm_insn (".long\t%2", op);
12758 targetm.asm_out.internal_label (file, "L",
12759 CODE_LABEL_NUMBER (op[7]));
12760 output_asm_insn (".long\t%3", op);
12763 final_end_function ();
12767 s390_valid_pointer_mode (machine_mode mode)
12769 return (mode == SImode || (TARGET_64BIT && mode == DImode));
12772 /* Checks whether the given CALL_EXPR would use a caller
12773 saved register. This is used to decide whether sibling call
12774 optimization could be performed on the respective function
12778 s390_call_saved_register_used (tree call_expr)
12780 CUMULATIVE_ARGS cum_v;
12781 cumulative_args_t cum;
12788 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
12789 cum = pack_cumulative_args (&cum_v);
12791 for (i = 0; i < call_expr_nargs (call_expr); i++)
12793 parameter = CALL_EXPR_ARG (call_expr, i);
12794 gcc_assert (parameter);
12796 /* For an undeclared variable passed as parameter we will get
12797 an ERROR_MARK node here. */
12798 if (TREE_CODE (parameter) == ERROR_MARK)
12801 type = TREE_TYPE (parameter);
12804 mode = TYPE_MODE (type);
12807 /* We assume that in the target function all parameters are
12808 named. This only has an impact on vector argument register
12809 usage none of which is call-saved. */
12810 if (pass_by_reference (&cum_v, mode, type, true))
12813 type = build_pointer_type (type);
12816 parm_rtx = s390_function_arg (cum, mode, type, true);
12818 s390_function_arg_advance (cum, mode, type, true);
12823 if (REG_P (parm_rtx))
12826 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
12828 if (!call_used_regs[reg + REGNO (parm_rtx)])
12832 if (GET_CODE (parm_rtx) == PARALLEL)
12836 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
12838 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
12840 gcc_assert (REG_P (r));
12843 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
12845 if (!call_used_regs[reg + REGNO (r)])
12854 /* Return true if the given call expression can be
12855 turned into a sibling call.
12856 DECL holds the declaration of the function to be called whereas
12857 EXP is the call expression itself. */
12860 s390_function_ok_for_sibcall (tree decl, tree exp)
12862 /* The TPF epilogue uses register 1. */
12863 if (TARGET_TPF_PROFILING)
12866 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
12867 which would have to be restored before the sibcall. */
12868 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
12871 /* Register 6 on s390 is available as an argument register but unfortunately
12872 "caller saved". This makes functions needing this register for arguments
12873 not suitable for sibcalls. */
12874 return !s390_call_saved_register_used (exp);
12877 /* Return the fixed registers used for condition codes. */
12880 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
12883 *p2 = INVALID_REGNUM;
12888 /* This function is used by the call expanders of the machine description.
12889 It emits the call insn itself together with the necessary operations
12890 to adjust the target address and returns the emitted insn.
12891 ADDR_LOCATION is the target address rtx
12892 TLS_CALL the location of the thread-local symbol
12893 RESULT_REG the register where the result of the call should be stored
12894 RETADDR_REG the register where the return address should be stored
12895 If this parameter is NULL_RTX the call is considered
12896 to be a sibling call. */
12899 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
12902 bool plt_call = false;
12908 /* Direct function calls need special treatment. */
12909 if (GET_CODE (addr_location) == SYMBOL_REF)
12911 /* When calling a global routine in PIC mode, we must
12912 replace the symbol itself with the PLT stub. */
12913 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
12915 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
12917 addr_location = gen_rtx_UNSPEC (Pmode,
12918 gen_rtvec (1, addr_location),
12920 addr_location = gen_rtx_CONST (Pmode, addr_location);
12924 /* For -fpic code the PLT entries might use r12 which is
12925 call-saved. Therefore we cannot do a sibcall when
12926 calling directly using a symbol ref. When reaching
12927 this point we decided (in s390_function_ok_for_sibcall)
12928 to do a sibcall for a function pointer but one of the
12929 optimizers was able to get rid of the function pointer
12930 by propagating the symbol ref into the call. This
12931 optimization is illegal for S/390 so we turn the direct
12932 call into a indirect call again. */
12933 addr_location = force_reg (Pmode, addr_location);
12936 /* Unless we can use the bras(l) insn, force the
12937 routine address into a register. */
12938 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
12941 addr_location = legitimize_pic_address (addr_location, 0);
12943 addr_location = force_reg (Pmode, addr_location);
12947 /* If it is already an indirect call or the code above moved the
12948 SYMBOL_REF to somewhere else make sure the address can be found in
12950 if (retaddr_reg == NULL_RTX
12951 && GET_CODE (addr_location) != SYMBOL_REF
12954 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
12955 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
12958 addr_location = gen_rtx_MEM (QImode, addr_location);
12959 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
12961 if (result_reg != NULL_RTX)
12962 call = gen_rtx_SET (result_reg, call);
12964 if (retaddr_reg != NULL_RTX)
12966 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
12968 if (tls_call != NULL_RTX)
12969 vec = gen_rtvec (3, call, clobber,
12970 gen_rtx_USE (VOIDmode, tls_call));
12972 vec = gen_rtvec (2, call, clobber);
12974 call = gen_rtx_PARALLEL (VOIDmode, vec);
12977 insn = emit_call_insn (call);
12979 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
12980 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
12982 /* s390_function_ok_for_sibcall should
12983 have denied sibcalls in this case. */
12984 gcc_assert (retaddr_reg != NULL_RTX);
12985 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
12990 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
12993 s390_conditional_register_usage (void)
12999 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13000 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13002 if (TARGET_CPU_ZARCH)
13004 fixed_regs[BASE_REGNUM] = 0;
13005 call_used_regs[BASE_REGNUM] = 0;
13006 fixed_regs[RETURN_REGNUM] = 0;
13007 call_used_regs[RETURN_REGNUM] = 0;
13011 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13012 call_used_regs[i] = call_really_used_regs[i] = 0;
13016 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13017 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13020 if (TARGET_SOFT_FLOAT)
13022 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13023 call_used_regs[i] = fixed_regs[i] = 1;
13026 /* Disable v16 - v31 for non-vector target. */
13029 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13030 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13034 /* Corresponding function to eh_return expander. */
13036 static GTY(()) rtx s390_tpf_eh_return_symbol;
13038 s390_emit_tpf_eh_return (rtx target)
13043 if (!s390_tpf_eh_return_symbol)
13044 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13046 reg = gen_rtx_REG (Pmode, 2);
13047 orig_ra = gen_rtx_REG (Pmode, 3);
13049 emit_move_insn (reg, target);
13050 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13051 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13052 gen_rtx_REG (Pmode, RETURN_REGNUM));
13053 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13054 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13056 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13059 /* Rework the prologue/epilogue to avoid saving/restoring
13060 registers unnecessarily. */
13063 s390_optimize_prologue (void)
13065 rtx_insn *insn, *new_insn, *next_insn;
13067 /* Do a final recompute of the frame-related data. */
13068 s390_optimize_register_info ();
13070 /* If all special registers are in fact used, there's nothing we
13071 can do, so no point in walking the insn list. */
13073 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13074 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
13075 && (TARGET_CPU_ZARCH
13076 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
13077 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
13080 /* Search for prologue/epilogue insns and replace them. */
13082 for (insn = get_insns (); insn; insn = next_insn)
13084 int first, last, off;
13085 rtx set, base, offset;
13088 next_insn = NEXT_INSN (insn);
13090 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13093 pat = PATTERN (insn);
13095 /* Remove ldgr/lgdr instructions used for saving and restore
13096 GPRs if possible. */
13101 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13102 tmp_pat = XVECEXP (pat, 0, 0);
13104 if (GET_CODE (tmp_pat) == SET
13105 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13106 && REG_P (SET_SRC (tmp_pat))
13107 && REG_P (SET_DEST (tmp_pat)))
13109 int src_regno = REGNO (SET_SRC (tmp_pat));
13110 int dest_regno = REGNO (SET_DEST (tmp_pat));
13114 if (!((GENERAL_REGNO_P (src_regno)
13115 && FP_REGNO_P (dest_regno))
13116 || (FP_REGNO_P (src_regno)
13117 && GENERAL_REGNO_P (dest_regno))))
13120 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13121 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13123 /* GPR must be call-saved, FPR must be call-clobbered. */
13124 if (!call_really_used_regs[fpr_regno]
13125 || call_really_used_regs[gpr_regno])
13128 /* It must not happen that what we once saved in an FPR now
13129 needs a stack slot. */
13130 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13132 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13134 remove_insn (insn);
13140 if (GET_CODE (pat) == PARALLEL
13141 && store_multiple_operation (pat, VOIDmode))
13143 set = XVECEXP (pat, 0, 0);
13144 first = REGNO (SET_SRC (set));
13145 last = first + XVECLEN (pat, 0) - 1;
13146 offset = const0_rtx;
13147 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13148 off = INTVAL (offset);
13150 if (GET_CODE (base) != REG || off < 0)
13152 if (cfun_frame_layout.first_save_gpr != -1
13153 && (cfun_frame_layout.first_save_gpr < first
13154 || cfun_frame_layout.last_save_gpr > last))
13156 if (REGNO (base) != STACK_POINTER_REGNUM
13157 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13159 if (first > BASE_REGNUM || last < BASE_REGNUM)
13162 if (cfun_frame_layout.first_save_gpr != -1)
13164 rtx s_pat = save_gprs (base,
13165 off + (cfun_frame_layout.first_save_gpr
13166 - first) * UNITS_PER_LONG,
13167 cfun_frame_layout.first_save_gpr,
13168 cfun_frame_layout.last_save_gpr);
13169 new_insn = emit_insn_before (s_pat, insn);
13170 INSN_ADDRESSES_NEW (new_insn, -1);
13173 remove_insn (insn);
13177 if (cfun_frame_layout.first_save_gpr == -1
13178 && GET_CODE (pat) == SET
13179 && GENERAL_REG_P (SET_SRC (pat))
13180 && GET_CODE (SET_DEST (pat)) == MEM)
13183 first = REGNO (SET_SRC (set));
13184 offset = const0_rtx;
13185 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13186 off = INTVAL (offset);
13188 if (GET_CODE (base) != REG || off < 0)
13190 if (REGNO (base) != STACK_POINTER_REGNUM
13191 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13194 remove_insn (insn);
13198 if (GET_CODE (pat) == PARALLEL
13199 && load_multiple_operation (pat, VOIDmode))
13201 set = XVECEXP (pat, 0, 0);
13202 first = REGNO (SET_DEST (set));
13203 last = first + XVECLEN (pat, 0) - 1;
13204 offset = const0_rtx;
13205 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13206 off = INTVAL (offset);
13208 if (GET_CODE (base) != REG || off < 0)
13211 if (cfun_frame_layout.first_restore_gpr != -1
13212 && (cfun_frame_layout.first_restore_gpr < first
13213 || cfun_frame_layout.last_restore_gpr > last))
13215 if (REGNO (base) != STACK_POINTER_REGNUM
13216 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13218 if (first > BASE_REGNUM || last < BASE_REGNUM)
13221 if (cfun_frame_layout.first_restore_gpr != -1)
13223 rtx rpat = restore_gprs (base,
13224 off + (cfun_frame_layout.first_restore_gpr
13225 - first) * UNITS_PER_LONG,
13226 cfun_frame_layout.first_restore_gpr,
13227 cfun_frame_layout.last_restore_gpr);
13229 /* Remove REG_CFA_RESTOREs for registers that we no
13230 longer need to save. */
13231 REG_NOTES (rpat) = REG_NOTES (insn);
13232 for (rtx *ptr = ®_NOTES (rpat); *ptr; )
13233 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13234 && ((int) REGNO (XEXP (*ptr, 0))
13235 < cfun_frame_layout.first_restore_gpr))
13236 *ptr = XEXP (*ptr, 1);
13238 ptr = &XEXP (*ptr, 1);
13239 new_insn = emit_insn_before (rpat, insn);
13240 RTX_FRAME_RELATED_P (new_insn) = 1;
13241 INSN_ADDRESSES_NEW (new_insn, -1);
13244 remove_insn (insn);
13248 if (cfun_frame_layout.first_restore_gpr == -1
13249 && GET_CODE (pat) == SET
13250 && GENERAL_REG_P (SET_DEST (pat))
13251 && GET_CODE (SET_SRC (pat)) == MEM)
13254 first = REGNO (SET_DEST (set));
13255 offset = const0_rtx;
13256 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13257 off = INTVAL (offset);
13259 if (GET_CODE (base) != REG || off < 0)
13262 if (REGNO (base) != STACK_POINTER_REGNUM
13263 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13266 remove_insn (insn);
13272 /* On z10 and later the dynamic branch prediction must see the
13273 backward jump within a certain windows. If not it falls back to
13274 the static prediction. This function rearranges the loop backward
13275 branch in a way which makes the static prediction always correct.
13276 The function returns true if it added an instruction. */
13278 s390_fix_long_loop_prediction (rtx_insn *insn)
13280 rtx set = single_set (insn);
13281 rtx code_label, label_ref, new_label;
13282 rtx_insn *uncond_jump;
13283 rtx_insn *cur_insn;
13287 /* This will exclude branch on count and branch on index patterns
13288 since these are correctly statically predicted. */
13290 || SET_DEST (set) != pc_rtx
13291 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13294 /* Skip conditional returns. */
13295 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13296 && XEXP (SET_SRC (set), 2) == pc_rtx)
13299 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13300 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13302 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13304 code_label = XEXP (label_ref, 0);
13306 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13307 || INSN_ADDRESSES (INSN_UID (insn)) == -1
13308 || (INSN_ADDRESSES (INSN_UID (insn))
13309 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13312 for (distance = 0, cur_insn = PREV_INSN (insn);
13313 distance < PREDICT_DISTANCE - 6;
13314 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13315 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13318 new_label = gen_label_rtx ();
13319 uncond_jump = emit_jump_insn_after (
13320 gen_rtx_SET (pc_rtx,
13321 gen_rtx_LABEL_REF (VOIDmode, code_label)),
13323 emit_label_after (new_label, uncond_jump);
13325 tmp = XEXP (SET_SRC (set), 1);
13326 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13327 XEXP (SET_SRC (set), 2) = tmp;
13328 INSN_CODE (insn) = -1;
13330 XEXP (label_ref, 0) = new_label;
13331 JUMP_LABEL (insn) = new_label;
13332 JUMP_LABEL (uncond_jump) = code_label;
13337 /* Returns 1 if INSN reads the value of REG for purposes not related
13338 to addressing of memory, and 0 otherwise. */
13340 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
13342 return reg_referenced_p (reg, PATTERN (insn))
13343 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
13346 /* Starting from INSN find_cond_jump looks downwards in the insn
13347 stream for a single jump insn which is the last user of the
13348 condition code set in INSN. */
13350 find_cond_jump (rtx_insn *insn)
13352 for (; insn; insn = NEXT_INSN (insn))
13356 if (LABEL_P (insn))
13359 if (!JUMP_P (insn))
13361 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
13366 /* This will be triggered by a return. */
13367 if (GET_CODE (PATTERN (insn)) != SET)
13370 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
13371 ite = SET_SRC (PATTERN (insn));
13373 if (GET_CODE (ite) != IF_THEN_ELSE)
13376 cc = XEXP (XEXP (ite, 0), 0);
13377 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
13380 if (find_reg_note (insn, REG_DEAD, cc))
13388 /* Swap the condition in COND and the operands in OP0 and OP1 so that
13389 the semantics does not change. If NULL_RTX is passed as COND the
13390 function tries to find the conditional jump starting with INSN. */
13392 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
13396 if (cond == NULL_RTX)
13398 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
13399 rtx set = jump ? single_set (jump) : NULL_RTX;
13401 if (set == NULL_RTX)
13404 cond = XEXP (SET_SRC (set), 0);
13409 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
13412 /* On z10, instructions of the compare-and-branch family have the
13413 property to access the register occurring as second operand with
13414 its bits complemented. If such a compare is grouped with a second
13415 instruction that accesses the same register non-complemented, and
13416 if that register's value is delivered via a bypass, then the
13417 pipeline recycles, thereby causing significant performance decline.
13418 This function locates such situations and exchanges the two
13419 operands of the compare. The function return true whenever it
13422 s390_z10_optimize_cmp (rtx_insn *insn)
13424 rtx_insn *prev_insn, *next_insn;
13425 bool insn_added_p = false;
13426 rtx cond, *op0, *op1;
13428 if (GET_CODE (PATTERN (insn)) == PARALLEL)
13430 /* Handle compare and branch and branch on count
13432 rtx pattern = single_set (insn);
13435 || SET_DEST (pattern) != pc_rtx
13436 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
13439 cond = XEXP (SET_SRC (pattern), 0);
13440 op0 = &XEXP (cond, 0);
13441 op1 = &XEXP (cond, 1);
13443 else if (GET_CODE (PATTERN (insn)) == SET)
13447 /* Handle normal compare instructions. */
13448 src = SET_SRC (PATTERN (insn));
13449 dest = SET_DEST (PATTERN (insn));
13452 || !CC_REGNO_P (REGNO (dest))
13453 || GET_CODE (src) != COMPARE)
13456 /* s390_swap_cmp will try to find the conditional
13457 jump when passing NULL_RTX as condition. */
13459 op0 = &XEXP (src, 0);
13460 op1 = &XEXP (src, 1);
13465 if (!REG_P (*op0) || !REG_P (*op1))
13468 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
13471 /* Swap the COMPARE arguments and its mask if there is a
13472 conflicting access in the previous insn. */
13473 prev_insn = prev_active_insn (insn);
13474 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13475 && reg_referenced_p (*op1, PATTERN (prev_insn)))
13476 s390_swap_cmp (cond, op0, op1, insn);
13478 /* Check if there is a conflict with the next insn. If there
13479 was no conflict with the previous insn, then swap the
13480 COMPARE arguments and its mask. If we already swapped
13481 the operands, or if swapping them would cause a conflict
13482 with the previous insn, issue a NOP after the COMPARE in
13483 order to separate the two instuctions. */
13484 next_insn = next_active_insn (insn);
13485 if (next_insn != NULL_RTX && INSN_P (next_insn)
13486 && s390_non_addr_reg_read_p (*op1, next_insn))
13488 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13489 && s390_non_addr_reg_read_p (*op0, prev_insn))
13491 if (REGNO (*op1) == 0)
13492 emit_insn_after (gen_nop1 (), insn);
13494 emit_insn_after (gen_nop (), insn);
13495 insn_added_p = true;
13498 s390_swap_cmp (cond, op0, op1, insn);
13500 return insn_added_p;
13503 /* Perform machine-dependent processing. */
13508 bool pool_overflow = false;
13509 int hw_before, hw_after;
13511 /* Make sure all splits have been performed; splits after
13512 machine_dependent_reorg might confuse insn length counts. */
13513 split_all_insns_noflow ();
13515 /* Install the main literal pool and the associated base
13516 register load insns.
13518 In addition, there are two problematic situations we need
13521 - the literal pool might be > 4096 bytes in size, so that
13522 some of its elements cannot be directly accessed
13524 - a branch target might be > 64K away from the branch, so that
13525 it is not possible to use a PC-relative instruction.
13527 To fix those, we split the single literal pool into multiple
13528 pool chunks, reloading the pool base register at various
13529 points throughout the function to ensure it always points to
13530 the pool chunk the following code expects, and / or replace
13531 PC-relative branches by absolute branches.
13533 However, the two problems are interdependent: splitting the
13534 literal pool can move a branch further away from its target,
13535 causing the 64K limit to overflow, and on the other hand,
13536 replacing a PC-relative branch by an absolute branch means
13537 we need to put the branch target address into the literal
13538 pool, possibly causing it to overflow.
13540 So, we loop trying to fix up both problems until we manage
13541 to satisfy both conditions at the same time. Note that the
13542 loop is guaranteed to terminate as every pass of the loop
13543 strictly decreases the total number of PC-relative branches
13544 in the function. (This is not completely true as there
13545 might be branch-over-pool insns introduced by chunkify_start.
13546 Those never need to be split however.) */
13550 struct constant_pool *pool = NULL;
13552 /* Collect the literal pool. */
13553 if (!pool_overflow)
13555 pool = s390_mainpool_start ();
13557 pool_overflow = true;
13560 /* If literal pool overflowed, start to chunkify it. */
13562 pool = s390_chunkify_start ();
13564 /* Split out-of-range branches. If this has created new
13565 literal pool entries, cancel current chunk list and
13566 recompute it. zSeries machines have large branch
13567 instructions, so we never need to split a branch. */
13568 if (!TARGET_CPU_ZARCH && s390_split_branches ())
13571 s390_chunkify_cancel (pool);
13573 s390_mainpool_cancel (pool);
13578 /* If we made it up to here, both conditions are satisfied.
13579 Finish up literal pool related changes. */
13581 s390_chunkify_finish (pool);
13583 s390_mainpool_finish (pool);
13585 /* We're done splitting branches. */
13586 cfun->machine->split_branches_pending_p = false;
13590 /* Generate out-of-pool execute target insns. */
13591 if (TARGET_CPU_ZARCH)
13593 rtx_insn *insn, *target;
13596 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13598 label = s390_execute_label (insn);
13602 gcc_assert (label != const0_rtx);
13604 target = emit_label (XEXP (label, 0));
13605 INSN_ADDRESSES_NEW (target, -1);
13607 target = emit_insn (s390_execute_target (insn));
13608 INSN_ADDRESSES_NEW (target, -1);
13612 /* Try to optimize prologue and epilogue further. */
13613 s390_optimize_prologue ();
13615 /* Walk over the insns and do some >=z10 specific changes. */
13616 if (s390_tune >= PROCESSOR_2097_Z10)
13619 bool insn_added_p = false;
13621 /* The insn lengths and addresses have to be up to date for the
13622 following manipulations. */
13623 shorten_branches (get_insns ());
13625 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13627 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13631 insn_added_p |= s390_fix_long_loop_prediction (insn);
13633 if ((GET_CODE (PATTERN (insn)) == PARALLEL
13634 || GET_CODE (PATTERN (insn)) == SET)
13635 && s390_tune == PROCESSOR_2097_Z10)
13636 insn_added_p |= s390_z10_optimize_cmp (insn);
13639 /* Adjust branches if we added new instructions. */
13641 shorten_branches (get_insns ());
13644 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
13649 /* Insert NOPs for hotpatching. */
13650 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13652 1. inside the area covered by debug information to allow setting
13653 breakpoints at the NOPs,
13654 2. before any insn which results in an asm instruction,
13655 3. before in-function labels to avoid jumping to the NOPs, for
13656 example as part of a loop,
13657 4. before any barrier in case the function is completely empty
13658 (__builtin_unreachable ()) and has neither internal labels nor
13661 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
13663 /* Output a series of NOPs before the first active insn. */
13664 while (insn && hw_after > 0)
13666 if (hw_after >= 3 && TARGET_CPU_ZARCH)
13668 emit_insn_before (gen_nop_6_byte (), insn);
13671 else if (hw_after >= 2)
13673 emit_insn_before (gen_nop_4_byte (), insn);
13678 emit_insn_before (gen_nop_2_byte (), insn);
13685 /* Return true if INSN is a fp load insn writing register REGNO. */
13687 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
13690 enum attr_type flag = s390_safe_attr_type (insn);
13692 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
13695 set = single_set (insn);
13697 if (set == NULL_RTX)
13700 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
13703 if (REGNO (SET_DEST (set)) != regno)
13709 /* This value describes the distance to be avoided between an
13710 aritmetic fp instruction and an fp load writing the same register.
13711 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
13712 fine but the exact value has to be avoided. Otherwise the FP
13713 pipeline will throw an exception causing a major penalty. */
13714 #define Z10_EARLYLOAD_DISTANCE 7
13716 /* Rearrange the ready list in order to avoid the situation described
13717 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
13718 moved to the very end of the ready list. */
13720 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
13722 unsigned int regno;
13723 int nready = *nready_p;
13728 enum attr_type flag;
13731 /* Skip DISTANCE - 1 active insns. */
13732 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
13733 distance > 0 && insn != NULL_RTX;
13734 distance--, insn = prev_active_insn (insn))
13735 if (CALL_P (insn) || JUMP_P (insn))
13738 if (insn == NULL_RTX)
13741 set = single_set (insn);
13743 if (set == NULL_RTX || !REG_P (SET_DEST (set))
13744 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
13747 flag = s390_safe_attr_type (insn);
13749 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
13752 regno = REGNO (SET_DEST (set));
13755 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
13762 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
13767 /* The s390_sched_state variable tracks the state of the current or
13768 the last instruction group.
13770 0,1,2 number of instructions scheduled in the current group
13771 3 the last group is complete - normal insns
13772 4 the last group was a cracked/expanded insn */
13774 static int s390_sched_state;
13776 #define S390_SCHED_STATE_NORMAL 3
13777 #define S390_SCHED_STATE_CRACKED 4
13779 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
13780 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
13781 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
13782 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
13784 static unsigned int
13785 s390_get_sched_attrmask (rtx_insn *insn)
13787 unsigned int mask = 0;
13791 case PROCESSOR_2827_ZEC12:
13792 if (get_attr_zEC12_cracked (insn))
13793 mask |= S390_SCHED_ATTR_MASK_CRACKED;
13794 if (get_attr_zEC12_expanded (insn))
13795 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
13796 if (get_attr_zEC12_endgroup (insn))
13797 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
13798 if (get_attr_zEC12_groupalone (insn))
13799 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
13801 case PROCESSOR_2964_Z13:
13802 if (get_attr_z13_cracked (insn))
13803 mask |= S390_SCHED_ATTR_MASK_CRACKED;
13804 if (get_attr_z13_expanded (insn))
13805 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
13806 if (get_attr_z13_endgroup (insn))
13807 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
13808 if (get_attr_z13_groupalone (insn))
13809 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
13812 gcc_unreachable ();
13817 static unsigned int
13818 s390_get_unit_mask (rtx_insn *insn, int *units)
13820 unsigned int mask = 0;
13824 case PROCESSOR_2964_Z13:
13826 if (get_attr_z13_unit_lsu (insn))
13828 if (get_attr_z13_unit_fxu (insn))
13830 if (get_attr_z13_unit_vfu (insn))
13834 gcc_unreachable ();
13839 /* Return the scheduling score for INSN. The higher the score the
13840 better. The score is calculated from the OOO scheduling attributes
13841 of INSN and the scheduling state s390_sched_state. */
13843 s390_sched_score (rtx_insn *insn)
13845 unsigned int mask = s390_get_sched_attrmask (insn);
13848 switch (s390_sched_state)
13851 /* Try to put insns into the first slot which would otherwise
13853 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
13854 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
13856 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
13859 /* Prefer not cracked insns while trying to put together a
13861 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
13862 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
13863 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
13865 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
13869 /* Prefer not cracked insns while trying to put together a
13871 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
13872 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
13873 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
13875 /* Prefer endgroup insns in the last slot. */
13876 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
13879 case S390_SCHED_STATE_NORMAL:
13880 /* Prefer not cracked insns if the last was not cracked. */
13881 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
13882 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
13884 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
13887 case S390_SCHED_STATE_CRACKED:
13888 /* Try to keep cracked insns together to prevent them from
13889 interrupting groups. */
13890 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
13891 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
13896 if (s390_tune == PROCESSOR_2964_Z13)
13899 unsigned unit_mask, m = 1;
13901 unit_mask = s390_get_unit_mask (insn, &units);
13902 gcc_assert (units <= MAX_SCHED_UNITS);
13904 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
13905 ago the last insn of this unit type got scheduled. This is
13906 supposed to help providing a proper instruction mix to the
13908 for (i = 0; i < units; i++, m <<= 1)
13910 score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
13911 MAX_SCHED_MIX_DISTANCE);
13916 /* This function is called via hook TARGET_SCHED_REORDER before
13917 issuing one insn from list READY which contains *NREADYP entries.
13918 For target z10 it reorders load instructions to avoid early load
13919 conflicts in the floating point pipeline */
13921 s390_sched_reorder (FILE *file, int verbose,
13922 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
13924 if (s390_tune == PROCESSOR_2097_Z10
13925 && reload_completed
13927 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
13929 if (s390_tune >= PROCESSOR_2827_ZEC12
13930 && reload_completed
13934 int last_index = *nreadyp - 1;
13935 int max_index = -1;
13936 int max_score = -1;
13939 /* Just move the insn with the highest score to the top (the
13940 end) of the list. A full sort is not needed since a conflict
13941 in the hazard recognition cannot happen. So the top insn in
13942 the ready list will always be taken. */
13943 for (i = last_index; i >= 0; i--)
13947 if (recog_memoized (ready[i]) < 0)
13950 score = s390_sched_score (ready[i]);
13951 if (score > max_score)
13958 if (max_index != -1)
13960 if (max_index != last_index)
13962 tmp = ready[max_index];
13963 ready[max_index] = ready[last_index];
13964 ready[last_index] = tmp;
13968 ";;\t\tBACKEND: move insn %d to the top of list\n",
13969 INSN_UID (ready[last_index]));
13971 else if (verbose > 5)
13973 ";;\t\tBACKEND: best insn %d already on top\n",
13974 INSN_UID (ready[last_index]));
13979 fprintf (file, "ready list ooo attributes - sched state: %d\n",
13982 for (i = last_index; i >= 0; i--)
13984 unsigned int sched_mask;
13985 rtx_insn *insn = ready[i];
13987 if (recog_memoized (insn) < 0)
13990 sched_mask = s390_get_sched_attrmask (insn);
13991 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
13993 s390_sched_score (insn));
13994 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
13995 ((M) & sched_mask) ? #ATTR : "");
13996 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
13997 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
13998 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
13999 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14000 #undef PRINT_SCHED_ATTR
14001 if (s390_tune == PROCESSOR_2964_Z13)
14003 unsigned int unit_mask, m = 1;
14006 unit_mask = s390_get_unit_mask (insn, &units);
14007 fprintf (file, "(units:");
14008 for (j = 0; j < units; j++, m <<= 1)
14010 fprintf (file, " u%d", j);
14011 fprintf (file, ")");
14013 fprintf (file, "\n");
14018 return s390_issue_rate ();
14022 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14023 the scheduler has issued INSN. It stores the last issued insn into
14024 last_scheduled_insn in order to make it available for
14025 s390_sched_reorder. */
14027 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14029 last_scheduled_insn = insn;
14031 if (s390_tune >= PROCESSOR_2827_ZEC12
14032 && reload_completed
14033 && recog_memoized (insn) >= 0)
14035 unsigned int mask = s390_get_sched_attrmask (insn);
14037 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14038 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14039 s390_sched_state = S390_SCHED_STATE_CRACKED;
14040 else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
14041 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14042 s390_sched_state = S390_SCHED_STATE_NORMAL;
14045 /* Only normal insns are left (mask == 0). */
14046 switch (s390_sched_state)
14051 case S390_SCHED_STATE_NORMAL:
14052 if (s390_sched_state == S390_SCHED_STATE_NORMAL)
14053 s390_sched_state = 1;
14055 s390_sched_state++;
14058 case S390_SCHED_STATE_CRACKED:
14059 s390_sched_state = S390_SCHED_STATE_NORMAL;
14064 if (s390_tune == PROCESSOR_2964_Z13)
14067 unsigned unit_mask, m = 1;
14069 unit_mask = s390_get_unit_mask (insn, &units);
14070 gcc_assert (units <= MAX_SCHED_UNITS);
14072 for (i = 0; i < units; i++, m <<= 1)
14074 last_scheduled_unit_distance[i] = 0;
14075 else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
14076 last_scheduled_unit_distance[i]++;
14081 unsigned int sched_mask;
14083 sched_mask = s390_get_sched_attrmask (insn);
14085 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
14086 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
14087 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14088 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14089 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14090 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14091 #undef PRINT_SCHED_ATTR
14093 if (s390_tune == PROCESSOR_2964_Z13)
14095 unsigned int unit_mask, m = 1;
14098 unit_mask = s390_get_unit_mask (insn, &units);
14099 fprintf (file, "(units:");
14100 for (j = 0; j < units; j++, m <<= 1)
14102 fprintf (file, " %d", j);
14103 fprintf (file, ")");
14105 fprintf (file, " sched state: %d\n", s390_sched_state);
14107 if (s390_tune == PROCESSOR_2964_Z13)
14111 s390_get_unit_mask (insn, &units);
14113 fprintf (file, ";;\t\tBACKEND: units unused for: ");
14114 for (j = 0; j < units; j++)
14115 fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
14116 fprintf (file, "\n");
14121 if (GET_CODE (PATTERN (insn)) != USE
14122 && GET_CODE (PATTERN (insn)) != CLOBBER)
14129 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
14130 int verbose ATTRIBUTE_UNUSED,
14131 int max_ready ATTRIBUTE_UNUSED)
14133 last_scheduled_insn = NULL;
14134 memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
14135 s390_sched_state = 0;
14138 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
14139 a new number struct loop *loop should be unrolled if tuned for cpus with
14140 a built-in stride prefetcher.
14141 The loop is analyzed for memory accesses by calling check_dpu for
14142 each rtx of the loop. Depending on the loop_depth and the amount of
14143 memory accesses a new number <=nunroll is returned to improve the
14144 behavior of the hardware prefetch unit. */
14146 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
14151 unsigned mem_count = 0;
14153 if (s390_tune < PROCESSOR_2097_Z10)
14156 /* Count the number of memory references within the loop body. */
14157 bbs = get_loop_body (loop);
14158 subrtx_iterator::array_type array;
14159 for (i = 0; i < loop->num_nodes; i++)
14160 FOR_BB_INSNS (bbs[i], insn)
14161 if (INSN_P (insn) && INSN_CODE (insn) != -1)
14162 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14167 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
14168 if (mem_count == 0)
14171 switch (loop_depth(loop))
14174 return MIN (nunroll, 28 / mem_count);
14176 return MIN (nunroll, 22 / mem_count);
14178 return MIN (nunroll, 16 / mem_count);
14182 /* Restore the current options. This is a hook function and also called
14186 s390_function_specific_restore (struct gcc_options *opts,
14187 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
14189 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
14193 s390_option_override_internal (bool main_args_p,
14194 struct gcc_options *opts,
14195 const struct gcc_options *opts_set)
14197 const char *prefix;
14198 const char *suffix;
14200 /* Set up prefix/suffix so the error messages refer to either the command
14201 line argument, or the attribute(target). */
14209 prefix = "option(\"";
14214 /* Architecture mode defaults according to ABI. */
14215 if (!(opts_set->x_target_flags & MASK_ZARCH))
14218 opts->x_target_flags |= MASK_ZARCH;
14220 opts->x_target_flags &= ~MASK_ZARCH;
14223 /* Set the march default in case it hasn't been specified on cmdline. */
14224 if (!opts_set->x_s390_arch)
14225 opts->x_s390_arch = PROCESSOR_2064_Z900;
14226 else if (opts->x_s390_arch == PROCESSOR_9672_G5
14227 || opts->x_s390_arch == PROCESSOR_9672_G6)
14228 warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
14229 "in future releases; use at least %sarch=z900%s",
14230 prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
14231 suffix, prefix, suffix);
14233 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
14235 /* Determine processor to tune for. */
14236 if (!opts_set->x_s390_tune)
14237 opts->x_s390_tune = opts->x_s390_arch;
14238 else if (opts->x_s390_tune == PROCESSOR_9672_G5
14239 || opts->x_s390_tune == PROCESSOR_9672_G6)
14240 warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
14241 "in future releases; use at least %stune=z900%s",
14242 prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
14243 suffix, prefix, suffix);
14245 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
14247 /* Sanity checks. */
14248 if (opts->x_s390_arch == PROCESSOR_NATIVE
14249 || opts->x_s390_tune == PROCESSOR_NATIVE)
14250 gcc_unreachable ();
14251 if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
14252 error ("z/Architecture mode not supported on %s",
14253 processor_table[(int)opts->x_s390_arch].name);
14254 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
14255 error ("64-bit ABI not supported in ESA/390 mode");
14257 /* Enable hardware transactions if available and not explicitly
14258 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
14259 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
14261 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
14262 opts->x_target_flags |= MASK_OPT_HTM;
14264 opts->x_target_flags &= ~MASK_OPT_HTM;
14267 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
14269 if (TARGET_OPT_VX_P (opts->x_target_flags))
14271 if (!TARGET_CPU_VX_P (opts))
14272 error ("hardware vector support not available on %s",
14273 processor_table[(int)opts->x_s390_arch].name);
14274 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14275 error ("hardware vector support not available with -msoft-float");
14280 if (TARGET_CPU_VX_P (opts))
14281 /* Enable vector support if available and not explicitly disabled
14282 by user. E.g. with -m31 -march=z13 -mzarch */
14283 opts->x_target_flags |= MASK_OPT_VX;
14285 opts->x_target_flags &= ~MASK_OPT_VX;
14288 /* Use hardware DFP if available and not explicitly disabled by
14289 user. E.g. with -m31 -march=z10 -mzarch */
14290 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
14292 if (TARGET_DFP_P (opts))
14293 opts->x_target_flags |= MASK_HARD_DFP;
14295 opts->x_target_flags &= ~MASK_HARD_DFP;
14298 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
14300 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
14302 if (!TARGET_CPU_DFP_P (opts))
14303 error ("hardware decimal floating point instructions"
14304 " not available on %s",
14305 processor_table[(int)opts->x_s390_arch].name);
14306 if (!TARGET_ZARCH_P (opts->x_target_flags))
14307 error ("hardware decimal floating point instructions"
14308 " not available in ESA/390 mode");
14311 opts->x_target_flags &= ~MASK_HARD_DFP;
14314 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
14315 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14317 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
14318 && TARGET_HARD_DFP_P (opts->x_target_flags))
14319 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
14321 opts->x_target_flags &= ~MASK_HARD_DFP;
14324 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
14325 && TARGET_PACKED_STACK_P (opts->x_target_flags)
14326 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
14327 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
14330 if (opts->x_s390_stack_size)
14332 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
14333 error ("stack size must be greater than the stack guard value");
14334 else if (opts->x_s390_stack_size > 1 << 16)
14335 error ("stack size must not be greater than 64k");
14337 else if (opts->x_s390_stack_guard)
14338 error ("-mstack-guard implies use of -mstack-size");
14340 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
14341 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
14342 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
14345 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
14347 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
14348 opts->x_param_values,
14349 opts_set->x_param_values);
14350 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
14351 opts->x_param_values,
14352 opts_set->x_param_values);
14353 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
14354 opts->x_param_values,
14355 opts_set->x_param_values);
14356 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
14357 opts->x_param_values,
14358 opts_set->x_param_values);
14361 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
14362 opts->x_param_values,
14363 opts_set->x_param_values);
14364 /* values for loop prefetching */
14365 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
14366 opts->x_param_values,
14367 opts_set->x_param_values);
14368 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
14369 opts->x_param_values,
14370 opts_set->x_param_values);
14371 /* s390 has more than 2 levels and the size is much larger. Since
14372 we are always running virtualized assume that we only get a small
14373 part of the caches above l1. */
14374 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
14375 opts->x_param_values,
14376 opts_set->x_param_values);
14377 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
14378 opts->x_param_values,
14379 opts_set->x_param_values);
14380 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
14381 opts->x_param_values,
14382 opts_set->x_param_values);
14384 /* Use the alternative scheduling-pressure algorithm by default. */
14385 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
14386 opts->x_param_values,
14387 opts_set->x_param_values);
14389 /* Call target specific restore function to do post-init work. At the moment,
14390 this just sets opts->x_s390_cost_pointer. */
14391 s390_function_specific_restore (opts, NULL);
14395 s390_option_override (void)
14398 cl_deferred_option *opt;
14399 vec<cl_deferred_option> *v =
14400 (vec<cl_deferred_option> *) s390_deferred_options;
14403 FOR_EACH_VEC_ELT (*v, i, opt)
14405 switch (opt->opt_index)
14407 case OPT_mhotpatch_:
14414 strncpy (s, opt->arg, 256);
14416 t = strchr (s, ',');
14421 val1 = integral_argument (s);
14422 val2 = integral_argument (t);
14429 if (val1 == -1 || val2 == -1)
14431 /* argument is not a plain number */
14432 error ("arguments to %qs should be non-negative integers",
14436 else if (val1 > s390_hotpatch_hw_max
14437 || val2 > s390_hotpatch_hw_max)
14439 error ("argument to %qs is too large (max. %d)",
14440 "-mhotpatch=n,m", s390_hotpatch_hw_max);
14443 s390_hotpatch_hw_before_label = val1;
14444 s390_hotpatch_hw_after_label = val2;
14448 gcc_unreachable ();
14452 /* Set up function hooks. */
14453 init_machine_status = s390_init_machine_status;
14455 s390_option_override_internal (true, &global_options, &global_options_set);
14457 /* Save the initial options in case the user does function specific
14459 target_option_default_node = build_target_option_node (&global_options);
14460 target_option_current_node = target_option_default_node;
14462 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
14463 requires the arch flags to be evaluated already. Since prefetching
14464 is beneficial on s390, we enable it if available. */
14465 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
14466 flag_prefetch_loop_arrays = 1;
14470 /* Don't emit DWARF3/4 unless specifically selected. The TPF
14471 debuggers do not yet support DWARF 3/4. */
14472 if (!global_options_set.x_dwarf_strict)
14474 if (!global_options_set.x_dwarf_version)
14478 /* Register a target-specific optimization-and-lowering pass
14479 to run immediately before prologue and epilogue generation.
14481 Registering the pass must be done at start up. It's
14482 convenient to do it here. */
14483 opt_pass *new_pass = new pass_s390_early_mach (g);
14484 struct register_pass_info insert_pass_s390_early_mach =
14486 new_pass, /* pass */
14487 "pro_and_epilogue", /* reference_pass_name */
14488 1, /* ref_pass_instance_number */
14489 PASS_POS_INSERT_BEFORE /* po_op */
14491 register_pass (&insert_pass_s390_early_mach);
14494 #if S390_USE_TARGET_ATTRIBUTE
14495 /* Inner function to process the attribute((target(...))), take an argument and
14496 set the current options from the argument. If we have a list, recursively go
14500 s390_valid_target_attribute_inner_p (tree args,
14501 struct gcc_options *opts,
14502 struct gcc_options *new_opts_set,
14508 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
14509 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
14510 static const struct
14512 const char *string;
14516 int only_as_pragma;
14519 S390_ATTRIB ("arch=", OPT_march_, 1),
14520 S390_ATTRIB ("tune=", OPT_mtune_, 1),
14521 /* uinteger options */
14522 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
14523 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
14524 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
14525 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
14527 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
14528 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
14529 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
14530 S390_ATTRIB ("htm", OPT_mhtm, 0),
14531 S390_ATTRIB ("vx", OPT_mvx, 0),
14532 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
14533 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
14534 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
14535 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
14536 S390_PRAGMA ("zvector", OPT_mzvector, 0),
14537 /* boolean options */
14538 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
14543 /* If this is a list, recurse to get the options. */
14544 if (TREE_CODE (args) == TREE_LIST)
14547 int num_pragma_values;
14550 /* Note: attribs.c:decl_attributes prepends the values from
14551 current_target_pragma to the list of target attributes. To determine
14552 whether we're looking at a value of the attribute or the pragma we
14553 assume that the first [list_length (current_target_pragma)] values in
14554 the list are the values from the pragma. */
14555 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
14556 ? list_length (current_target_pragma) : 0;
14557 for (i = 0; args; args = TREE_CHAIN (args), i++)
14561 is_pragma = (force_pragma || i < num_pragma_values);
14562 if (TREE_VALUE (args)
14563 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
14564 opts, new_opts_set,
14573 else if (TREE_CODE (args) != STRING_CST)
14575 error ("attribute %<target%> argument not a string");
14579 /* Handle multiple arguments separated by commas. */
14580 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
14582 while (next_optstr && *next_optstr != '\0')
14584 char *p = next_optstr;
14586 char *comma = strchr (next_optstr, ',');
14587 size_t len, opt_len;
14593 enum cl_var_type var_type;
14599 len = comma - next_optstr;
14600 next_optstr = comma + 1;
14605 next_optstr = NULL;
14608 /* Recognize no-xxx. */
14609 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
14618 /* Find the option. */
14621 for (i = 0; i < ARRAY_SIZE (attrs); i++)
14623 opt_len = attrs[i].len;
14624 if (ch == attrs[i].string[0]
14625 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
14626 && memcmp (p, attrs[i].string, opt_len) == 0)
14628 opt = attrs[i].opt;
14629 if (!opt_set_p && cl_options[opt].cl_reject_negative)
14631 mask = cl_options[opt].var_value;
14632 var_type = cl_options[opt].var_type;
14638 /* Process the option. */
14641 error ("attribute(target(\"%s\")) is unknown", orig_p);
14644 else if (attrs[i].only_as_pragma && !force_pragma)
14646 /* Value is not allowed for the target attribute. */
14647 error ("Value %qs is not supported by attribute %<target%>",
14652 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
14654 if (var_type == CLVC_BIT_CLEAR)
14655 opt_set_p = !opt_set_p;
14658 opts->x_target_flags |= mask;
14660 opts->x_target_flags &= ~mask;
14661 new_opts_set->x_target_flags |= mask;
14664 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
14668 if (cl_options[opt].cl_uinteger)
14670 /* Unsigned integer argument. Code based on the function
14671 decode_cmdline_option () in opts-common.c. */
14672 value = integral_argument (p + opt_len);
14675 value = (opt_set_p) ? 1 : 0;
14679 struct cl_decoded_option decoded;
14681 /* Value range check; only implemented for numeric and boolean
14682 options at the moment. */
14683 generate_option (opt, NULL, value, CL_TARGET, &decoded);
14684 s390_handle_option (opts, new_opts_set, &decoded, input_location);
14685 set_option (opts, new_opts_set, opt, value,
14686 p + opt_len, DK_UNSPECIFIED, input_location,
14691 error ("attribute(target(\"%s\")) is unknown", orig_p);
14696 else if (cl_options[opt].var_type == CLVC_ENUM)
14701 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
14703 set_option (opts, new_opts_set, opt, value,
14704 p + opt_len, DK_UNSPECIFIED, input_location,
14708 error ("attribute(target(\"%s\")) is unknown", orig_p);
14714 gcc_unreachable ();
14719 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
14722 s390_valid_target_attribute_tree (tree args,
14723 struct gcc_options *opts,
14724 const struct gcc_options *opts_set,
14727 tree t = NULL_TREE;
14728 struct gcc_options new_opts_set;
14730 memset (&new_opts_set, 0, sizeof (new_opts_set));
14732 /* Process each of the options on the chain. */
14733 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
14735 return error_mark_node;
14737 /* If some option was set (even if it has not changed), rerun
14738 s390_option_override_internal, and then save the options away. */
14739 if (new_opts_set.x_target_flags
14740 || new_opts_set.x_s390_arch
14741 || new_opts_set.x_s390_tune
14742 || new_opts_set.x_s390_stack_guard
14743 || new_opts_set.x_s390_stack_size
14744 || new_opts_set.x_s390_branch_cost
14745 || new_opts_set.x_s390_warn_framesize
14746 || new_opts_set.x_s390_warn_dynamicstack_p)
14748 const unsigned char *src = (const unsigned char *)opts_set;
14749 unsigned char *dest = (unsigned char *)&new_opts_set;
14752 /* Merge the original option flags into the new ones. */
14753 for (i = 0; i < sizeof(*opts_set); i++)
14756 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
14757 s390_option_override_internal (false, opts, &new_opts_set);
14758 /* Save the current options unless we are validating options for
14760 t = build_target_option_node (opts);
14765 /* Hook to validate attribute((target("string"))). */
14768 s390_valid_target_attribute_p (tree fndecl,
14769 tree ARG_UNUSED (name),
14771 int ARG_UNUSED (flags))
14773 struct gcc_options func_options;
14774 tree new_target, new_optimize;
14777 /* attribute((target("default"))) does nothing, beyond
14778 affecting multi-versioning. */
14779 if (TREE_VALUE (args)
14780 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
14781 && TREE_CHAIN (args) == NULL_TREE
14782 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
14785 tree old_optimize = build_optimization_node (&global_options);
14787 /* Get the optimization options of the current function. */
14788 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
14790 if (!func_optimize)
14791 func_optimize = old_optimize;
14793 /* Init func_options. */
14794 memset (&func_options, 0, sizeof (func_options));
14795 init_options_struct (&func_options, NULL);
14796 lang_hooks.init_options_struct (&func_options);
14798 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
14800 /* Initialize func_options to the default before its target options can
14802 cl_target_option_restore (&func_options,
14803 TREE_TARGET_OPTION (target_option_default_node));
14805 new_target = s390_valid_target_attribute_tree (args, &func_options,
14806 &global_options_set,
14808 current_target_pragma));
14809 new_optimize = build_optimization_node (&func_options);
14810 if (new_target == error_mark_node)
14812 else if (fndecl && new_target)
14814 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
14815 if (old_optimize != new_optimize)
14816 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
14821 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
14825 s390_activate_target_options (tree new_tree)
14827 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
14828 if (TREE_TARGET_GLOBALS (new_tree))
14829 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
14830 else if (new_tree == target_option_default_node)
14831 restore_target_globals (&default_target_globals);
14833 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
14834 s390_previous_fndecl = NULL_TREE;
14837 /* Establish appropriate back-end context for processing the function
14838 FNDECL. The argument might be NULL to indicate processing at top
14839 level, outside of any function scope. */
14841 s390_set_current_function (tree fndecl)
14843 /* Only change the context if the function changes. This hook is called
14844 several times in the course of compiling a function, and we don't want to
14845 slow things down too much or call target_reinit when it isn't safe. */
14846 if (fndecl == s390_previous_fndecl)
14850 if (s390_previous_fndecl == NULL_TREE)
14851 old_tree = target_option_current_node;
14852 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
14853 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
14855 old_tree = target_option_default_node;
14857 if (fndecl == NULL_TREE)
14859 if (old_tree != target_option_current_node)
14860 s390_activate_target_options (target_option_current_node);
14864 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
14865 if (new_tree == NULL_TREE)
14866 new_tree = target_option_default_node;
14868 if (old_tree != new_tree)
14869 s390_activate_target_options (new_tree);
14870 s390_previous_fndecl = fndecl;
14874 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
14877 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
14878 unsigned int align ATTRIBUTE_UNUSED,
14879 enum by_pieces_operation op ATTRIBUTE_UNUSED,
14880 bool speed_p ATTRIBUTE_UNUSED)
14882 return (size == 1 || size == 2
14883 || size == 4 || (TARGET_ZARCH && size == 8));
14886 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
14889 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
14891 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
14892 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
14893 tree call_efpc = build_call_expr (efpc, 0);
14894 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
14896 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
14897 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
14898 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
14899 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
14900 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
14901 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
14903 /* Generates the equivalent of feholdexcept (&fenv_var)
14905 fenv_var = __builtin_s390_efpc ();
14906 __builtin_s390_sfpc (fenv_var & mask) */
14907 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
14909 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
14910 build_int_cst (unsigned_type_node,
14911 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
14912 FPC_EXCEPTION_MASK)));
14913 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
14914 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
14916 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
14918 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
14919 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
14920 build_int_cst (unsigned_type_node,
14921 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
14922 *clear = build_call_expr (sfpc, 1, new_fpc);
14924 /* Generates the equivalent of feupdateenv (fenv_var)
14926 old_fpc = __builtin_s390_efpc ();
14927 __builtin_s390_sfpc (fenv_var);
14928 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
14930 old_fpc = create_tmp_var_raw (unsigned_type_node);
14931 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
14932 old_fpc, call_efpc);
14934 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
14936 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
14937 build_int_cst (unsigned_type_node,
14939 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
14940 build_int_cst (unsigned_type_node,
14942 tree atomic_feraiseexcept
14943 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
14944 raise_old_except = build_call_expr (atomic_feraiseexcept,
14945 1, raise_old_except);
14947 *update = build2 (COMPOUND_EXPR, void_type_node,
14948 build2 (COMPOUND_EXPR, void_type_node,
14949 store_old_fpc, set_new_fpc),
14952 #undef FPC_EXCEPTION_MASK
14953 #undef FPC_FLAGS_MASK
14954 #undef FPC_DXC_MASK
14955 #undef FPC_EXCEPTION_MASK_SHIFT
14956 #undef FPC_FLAGS_SHIFT
14957 #undef FPC_DXC_SHIFT
14960 /* Return the vector mode to be used for inner mode MODE when doing
14962 static machine_mode
14963 s390_preferred_simd_mode (machine_mode mode)
14983 /* Our hardware does not require vectors to be strictly aligned. */
14985 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
14986 const_tree type ATTRIBUTE_UNUSED,
14987 int misalignment ATTRIBUTE_UNUSED,
14988 bool is_packed ATTRIBUTE_UNUSED)
14993 return default_builtin_support_vector_misalignment (mode, type, misalignment,
14997 /* The vector ABI requires vector types to be aligned on an 8 byte
14998 boundary (our stack alignment). However, we allow this to be
14999 overriden by the user, while this definitely breaks the ABI. */
15000 static HOST_WIDE_INT
15001 s390_vector_alignment (const_tree type)
15003 if (!TARGET_VX_ABI)
15004 return default_vector_alignment (type);
15006 if (TYPE_USER_ALIGN (type))
15007 return TYPE_ALIGN (type);
15009 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
15012 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15013 /* Implement TARGET_ASM_FILE_START. */
15015 s390_asm_file_start (void)
15017 default_file_start ();
15018 s390_asm_output_machine_for_arch (asm_out_file);
15022 /* Implement TARGET_ASM_FILE_END. */
15024 s390_asm_file_end (void)
15026 #ifdef HAVE_AS_GNU_ATTRIBUTE
15027 varpool_node *vnode;
15028 cgraph_node *cnode;
15030 FOR_EACH_VARIABLE (vnode)
15031 if (TREE_PUBLIC (vnode->decl))
15032 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
15034 FOR_EACH_FUNCTION (cnode)
15035 if (TREE_PUBLIC (cnode->decl))
15036 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
15039 if (s390_vector_abi != 0)
15040 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
15043 file_end_indicate_exec_stack ();
15045 if (flag_split_stack)
15046 file_end_indicate_split_stack ();
15049 /* Return true if TYPE is a vector bool type. */
15051 s390_vector_bool_type_p (const_tree type)
15053 return TYPE_VECTOR_OPAQUE (type);
15056 /* Return the diagnostic message string if the binary operation OP is
15057 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15059 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
15061 bool bool1_p, bool2_p;
15065 machine_mode mode1, mode2;
15067 if (!TARGET_ZVECTOR)
15070 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
15073 bool1_p = s390_vector_bool_type_p (type1);
15074 bool2_p = s390_vector_bool_type_p (type2);
15076 /* Mixing signed and unsigned types is forbidden for all
15078 if (!bool1_p && !bool2_p
15079 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
15080 return N_("types differ in signess");
15082 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
15083 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
15084 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
15085 || op == ROUND_DIV_EXPR);
15086 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
15087 || op == EQ_EXPR || op == NE_EXPR);
15089 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
15090 return N_("binary operator does not support two vector bool operands");
15092 if (bool1_p != bool2_p && (muldiv_p || compare_p))
15093 return N_("binary operator does not support vector bool operand");
15095 mode1 = TYPE_MODE (type1);
15096 mode2 = TYPE_MODE (type2);
15098 if (bool1_p != bool2_p && plusminus_p
15099 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
15100 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
15101 return N_("binary operator does not support mixing vector "
15102 "bool with floating point vector operands");
15107 /* Initialize GCC target structure. */
15109 #undef TARGET_ASM_ALIGNED_HI_OP
15110 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
15111 #undef TARGET_ASM_ALIGNED_DI_OP
15112 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
15113 #undef TARGET_ASM_INTEGER
15114 #define TARGET_ASM_INTEGER s390_assemble_integer
15116 #undef TARGET_ASM_OPEN_PAREN
15117 #define TARGET_ASM_OPEN_PAREN ""
15119 #undef TARGET_ASM_CLOSE_PAREN
15120 #define TARGET_ASM_CLOSE_PAREN ""
15122 #undef TARGET_OPTION_OVERRIDE
15123 #define TARGET_OPTION_OVERRIDE s390_option_override
15125 #undef TARGET_ENCODE_SECTION_INFO
15126 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
15128 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15129 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15132 #undef TARGET_HAVE_TLS
15133 #define TARGET_HAVE_TLS true
15135 #undef TARGET_CANNOT_FORCE_CONST_MEM
15136 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
15138 #undef TARGET_DELEGITIMIZE_ADDRESS
15139 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
15141 #undef TARGET_LEGITIMIZE_ADDRESS
15142 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
15144 #undef TARGET_RETURN_IN_MEMORY
15145 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
15147 #undef TARGET_INIT_BUILTINS
15148 #define TARGET_INIT_BUILTINS s390_init_builtins
15149 #undef TARGET_EXPAND_BUILTIN
15150 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
15151 #undef TARGET_BUILTIN_DECL
15152 #define TARGET_BUILTIN_DECL s390_builtin_decl
15154 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
15155 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
15157 #undef TARGET_ASM_OUTPUT_MI_THUNK
15158 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
15159 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
15160 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
15162 #undef TARGET_SCHED_ADJUST_PRIORITY
15163 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
15164 #undef TARGET_SCHED_ISSUE_RATE
15165 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
15166 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15167 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
15169 #undef TARGET_SCHED_VARIABLE_ISSUE
15170 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
15171 #undef TARGET_SCHED_REORDER
15172 #define TARGET_SCHED_REORDER s390_sched_reorder
15173 #undef TARGET_SCHED_INIT
15174 #define TARGET_SCHED_INIT s390_sched_init
15176 #undef TARGET_CANNOT_COPY_INSN_P
15177 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
15178 #undef TARGET_RTX_COSTS
15179 #define TARGET_RTX_COSTS s390_rtx_costs
15180 #undef TARGET_ADDRESS_COST
15181 #define TARGET_ADDRESS_COST s390_address_cost
15182 #undef TARGET_REGISTER_MOVE_COST
15183 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
15184 #undef TARGET_MEMORY_MOVE_COST
15185 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
15187 #undef TARGET_MACHINE_DEPENDENT_REORG
15188 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
15190 #undef TARGET_VALID_POINTER_MODE
15191 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
15193 #undef TARGET_BUILD_BUILTIN_VA_LIST
15194 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
15195 #undef TARGET_EXPAND_BUILTIN_VA_START
15196 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
15197 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
15198 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
15200 #undef TARGET_PROMOTE_FUNCTION_MODE
15201 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
15202 #undef TARGET_PASS_BY_REFERENCE
15203 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
15205 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
15206 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
15207 #undef TARGET_FUNCTION_ARG
15208 #define TARGET_FUNCTION_ARG s390_function_arg
15209 #undef TARGET_FUNCTION_ARG_ADVANCE
15210 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
15211 #undef TARGET_FUNCTION_VALUE
15212 #define TARGET_FUNCTION_VALUE s390_function_value
15213 #undef TARGET_LIBCALL_VALUE
15214 #define TARGET_LIBCALL_VALUE s390_libcall_value
15215 #undef TARGET_STRICT_ARGUMENT_NAMING
15216 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
15218 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
15219 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
15221 #undef TARGET_FIXED_CONDITION_CODE_REGS
15222 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
15224 #undef TARGET_CC_MODES_COMPATIBLE
15225 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
15227 #undef TARGET_INVALID_WITHIN_DOLOOP
15228 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
15231 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
15232 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
15235 #undef TARGET_DWARF_FRAME_REG_MODE
15236 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
15238 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
15239 #undef TARGET_MANGLE_TYPE
15240 #define TARGET_MANGLE_TYPE s390_mangle_type
15243 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15244 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15246 #undef TARGET_VECTOR_MODE_SUPPORTED_P
15247 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
15249 #undef TARGET_PREFERRED_RELOAD_CLASS
15250 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
15252 #undef TARGET_SECONDARY_RELOAD
15253 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
15255 #undef TARGET_LIBGCC_CMP_RETURN_MODE
15256 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
15258 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
15259 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
15261 #undef TARGET_LEGITIMATE_ADDRESS_P
15262 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
15264 #undef TARGET_LEGITIMATE_CONSTANT_P
15265 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
15267 #undef TARGET_LRA_P
15268 #define TARGET_LRA_P s390_lra_p
15270 #undef TARGET_CAN_ELIMINATE
15271 #define TARGET_CAN_ELIMINATE s390_can_eliminate
15273 #undef TARGET_CONDITIONAL_REGISTER_USAGE
15274 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
15276 #undef TARGET_LOOP_UNROLL_ADJUST
15277 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
15279 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
15280 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
15281 #undef TARGET_TRAMPOLINE_INIT
15282 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
15284 #undef TARGET_UNWIND_WORD_MODE
15285 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
15287 #undef TARGET_CANONICALIZE_COMPARISON
15288 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
15290 #undef TARGET_HARD_REGNO_SCRATCH_OK
15291 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
15293 #undef TARGET_ATTRIBUTE_TABLE
15294 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
15296 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
15297 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
15299 #undef TARGET_SET_UP_BY_PROLOGUE
15300 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
15302 #undef TARGET_EXTRA_LIVE_ON_ENTRY
15303 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
15305 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
15306 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
15307 s390_use_by_pieces_infrastructure_p
15309 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
15310 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
15312 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
15313 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
15315 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
15316 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
15318 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
15319 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
15321 #undef TARGET_VECTOR_ALIGNMENT
15322 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
15324 #undef TARGET_INVALID_BINARY_OP
15325 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
15327 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15328 #undef TARGET_ASM_FILE_START
15329 #define TARGET_ASM_FILE_START s390_asm_file_start
15332 #undef TARGET_ASM_FILE_END
15333 #define TARGET_ASM_FILE_END s390_asm_file_end
15335 #if S390_USE_TARGET_ATTRIBUTE
15336 #undef TARGET_SET_CURRENT_FUNCTION
15337 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
15339 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
15340 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
15343 #undef TARGET_OPTION_RESTORE
15344 #define TARGET_OPTION_RESTORE s390_function_specific_restore
15346 struct gcc_target targetm = TARGET_INITIALIZER;
15348 #include "gt-s390.h"