1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
25 #include "coretypes.h"
35 #include "stringpool.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
46 #include "fold-const.h"
48 #include "stor-layout.h"
50 #include "print-tree.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
59 #include "sched-int.h"
61 #include "gimple-fold.h"
62 #include "gimple-iterator.h"
63 #include "gimple-ssa.h"
64 #include "gimple-walk.h"
67 #include "tm-constrs.h"
68 #include "tree-vectorizer.h"
69 #include "target-globals.h"
71 #include "tree-vector-builder.h"
73 #include "tree-pass.h"
76 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
79 #include "gstab.h" /* for N_SLINE */
81 #include "case-cfn-macros.h"
83 #include "tree-ssa-propagate.h"
85 #include "tree-ssanames.h"
87 /* This file should be included last. */
88 #include "target-def.h"
90 #ifndef TARGET_NO_PROTOTYPE
91 #define TARGET_NO_PROTOTYPE 0
94 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
95 systems will also set long double to be IEEE 128-bit. AIX and Darwin
96 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
97 those systems will not pick up this default. This needs to be after all
98 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
100 #ifndef TARGET_IEEEQUAD_DEFAULT
101 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
102 #define TARGET_IEEEQUAD_DEFAULT 1
104 #define TARGET_IEEEQUAD_DEFAULT 0
108 static pad_direction rs6000_function_arg_padding (machine_mode, const_tree);
110 /* Structure used to define the rs6000 stack */
111 typedef struct rs6000_stack {
112 int reload_completed; /* stack info won't change from here on */
113 int first_gp_reg_save; /* first callee saved GP register used */
114 int first_fp_reg_save; /* first callee saved FP register used */
115 int first_altivec_reg_save; /* first callee saved AltiVec register used */
116 int lr_save_p; /* true if the link reg needs to be saved */
117 int cr_save_p; /* true if the CR reg needs to be saved */
118 unsigned int vrsave_mask; /* mask of vec registers to save */
119 int push_p; /* true if we need to allocate stack space */
120 int calls_p; /* true if the function makes any calls */
121 int world_save_p; /* true if we're saving *everything*:
122 r13-r31, cr, f14-f31, vrsave, v20-v31 */
123 enum rs6000_abi abi; /* which ABI to use */
124 int gp_save_offset; /* offset to save GP regs from initial SP */
125 int fp_save_offset; /* offset to save FP regs from initial SP */
126 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
127 int lr_save_offset; /* offset to save LR from initial SP */
128 int cr_save_offset; /* offset to save CR from initial SP */
129 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
130 int varargs_save_offset; /* offset to save the varargs registers */
131 int ehrd_offset; /* offset to EH return data */
132 int ehcr_offset; /* offset to EH CR field data */
133 int reg_size; /* register size (4 or 8) */
134 HOST_WIDE_INT vars_size; /* variable save area size */
135 int parm_size; /* outgoing parameter size */
136 int save_size; /* save area size */
137 int fixed_size; /* fixed size of stack frame */
138 int gp_size; /* size of saved GP registers */
139 int fp_size; /* size of saved FP registers */
140 int altivec_size; /* size of saved AltiVec registers */
141 int cr_size; /* size to hold CR if not in fixed area */
142 int vrsave_size; /* size to hold VRSAVE */
143 int altivec_padding_size; /* size of altivec alignment padding */
144 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
148 /* A C structure for machine-specific, per-function data.
149 This is added to the cfun structure. */
150 typedef struct GTY(()) machine_function
152 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
153 int ra_needs_full_frame;
154 /* Flags if __builtin_return_address (0) was used. */
156 /* Cache lr_save_p after expansion of builtin_eh_return. */
158 /* Whether we need to save the TOC to the reserved stack location in the
159 function prologue. */
160 bool save_toc_in_prologue;
161 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
162 varargs save area. */
163 HOST_WIDE_INT varargs_save_offset;
164 /* Alternative internal arg pointer for -fsplit-stack. */
165 rtx split_stack_arg_pointer;
166 bool split_stack_argp_used;
167 /* Flag if r2 setup is needed with ELFv2 ABI. */
168 bool r2_setup_needed;
169 /* The number of components we use for separate shrink-wrapping. */
171 /* The components already handled by separate shrink-wrapping, which should
172 not be considered by the prologue and epilogue. */
173 bool gpr_is_wrapped_separately[32];
174 bool fpr_is_wrapped_separately[32];
175 bool lr_is_wrapped_separately;
176 bool toc_is_wrapped_separately;
179 /* Support targetm.vectorize.builtin_mask_for_load. */
180 static GTY(()) tree altivec_builtin_mask_for_load;
182 /* Set to nonzero once AIX common-mode calls have been defined. */
183 static GTY(()) int common_mode_defined;
185 /* Label number of label created for -mrelocatable, to call to so we can
186 get the address of the GOT section */
187 static int rs6000_pic_labelno;
190 /* Counter for labels which are to be placed in .fixup. */
191 int fixuplabelno = 0;
194 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
197 /* Specify the machine mode that pointers have. After generation of rtl, the
198 compiler makes no further distinction between pointers and any other objects
199 of this machine mode. */
200 scalar_int_mode rs6000_pmode;
203 /* Note whether IEEE 128-bit floating point was passed or returned, either as
204 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
205 floating point. We changed the default C++ mangling for these types and we
206 may want to generate a weak alias of the old mangling (U10__float128) to the
207 new mangling (u9__ieee128). */
208 static bool rs6000_passes_ieee128;
211 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
212 name used in current releases (i.e. u9__ieee128). */
213 static bool ieee128_mangling_gcc_8_1;
215 /* Width in bits of a pointer. */
216 unsigned rs6000_pointer_size;
218 #ifdef HAVE_AS_GNU_ATTRIBUTE
219 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
220 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
222 /* Flag whether floating point values have been passed/returned.
223 Note that this doesn't say whether fprs are used, since the
224 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
225 should be set for soft-float values passed in gprs and ieee128
226 values passed in vsx registers. */
227 static bool rs6000_passes_float;
228 static bool rs6000_passes_long_double;
229 /* Flag whether vector values have been passed/returned. */
230 static bool rs6000_passes_vector;
231 /* Flag whether small (<= 8 byte) structures have been returned. */
232 static bool rs6000_returns_struct;
235 /* Value is TRUE if register/mode pair is acceptable. */
236 static bool rs6000_hard_regno_mode_ok_p
237 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
239 /* Maximum number of registers needed for a given register class and mode. */
240 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
242 /* How many registers are needed for a given register and mode. */
243 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
245 /* Map register number to register class. */
246 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
248 static int dbg_cost_ctrl;
250 /* Built in types. */
251 tree rs6000_builtin_types[RS6000_BTI_MAX];
252 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
254 /* Flag to say the TOC is initialized */
255 int toc_initialized, need_toc_init;
256 char toc_label_name[10];
258 /* Cached value of rs6000_variable_issue. This is cached in
259 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
260 static short cached_can_issue_more;
262 static GTY(()) section *read_only_data_section;
263 static GTY(()) section *private_data_section;
264 static GTY(()) section *tls_data_section;
265 static GTY(()) section *tls_private_data_section;
266 static GTY(()) section *read_only_private_data_section;
267 static GTY(()) section *sdata2_section;
268 static GTY(()) section *toc_section;
270 struct builtin_description
272 const HOST_WIDE_INT mask;
273 const enum insn_code icode;
274 const char *const name;
275 const enum rs6000_builtins code;
278 /* Describe the vector unit used for modes. */
279 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
280 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
282 /* Register classes for various constraints that are based on the target
284 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
286 /* Describe the alignment of a vector. */
287 int rs6000_vector_align[NUM_MACHINE_MODES];
289 /* Map selected modes to types for builtins. */
290 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
292 /* What modes to automatically generate reciprocal divide estimate (fre) and
293 reciprocal sqrt (frsqrte) for. */
294 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
296 /* Masks to determine which reciprocal esitmate instructions to generate
298 enum rs6000_recip_mask {
299 RECIP_SF_DIV = 0x001, /* Use divide estimate */
300 RECIP_DF_DIV = 0x002,
301 RECIP_V4SF_DIV = 0x004,
302 RECIP_V2DF_DIV = 0x008,
304 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
305 RECIP_DF_RSQRT = 0x020,
306 RECIP_V4SF_RSQRT = 0x040,
307 RECIP_V2DF_RSQRT = 0x080,
309 /* Various combination of flags for -mrecip=xxx. */
311 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
312 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
313 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
315 RECIP_HIGH_PRECISION = RECIP_ALL,
317 /* On low precision machines like the power5, don't enable double precision
318 reciprocal square root estimate, since it isn't accurate enough. */
319 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
322 /* -mrecip options. */
325 const char *string; /* option name */
326 unsigned int mask; /* mask bits to set */
327 } recip_options[] = {
328 { "all", RECIP_ALL },
329 { "none", RECIP_NONE },
330 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
332 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
333 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
334 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
335 | RECIP_V2DF_RSQRT) },
336 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
337 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
340 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
346 { "power9", PPC_PLATFORM_POWER9 },
347 { "power8", PPC_PLATFORM_POWER8 },
348 { "power7", PPC_PLATFORM_POWER7 },
349 { "power6x", PPC_PLATFORM_POWER6X },
350 { "power6", PPC_PLATFORM_POWER6 },
351 { "power5+", PPC_PLATFORM_POWER5_PLUS },
352 { "power5", PPC_PLATFORM_POWER5 },
353 { "ppc970", PPC_PLATFORM_PPC970 },
354 { "power4", PPC_PLATFORM_POWER4 },
355 { "ppca2", PPC_PLATFORM_PPCA2 },
356 { "ppc476", PPC_PLATFORM_PPC476 },
357 { "ppc464", PPC_PLATFORM_PPC464 },
358 { "ppc440", PPC_PLATFORM_PPC440 },
359 { "ppc405", PPC_PLATFORM_PPC405 },
360 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
363 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
369 } cpu_supports_info[] = {
370 /* AT_HWCAP masks. */
371 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
372 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
373 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
374 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
375 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
376 { "booke", PPC_FEATURE_BOOKE, 0 },
377 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
378 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
379 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
380 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
381 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
382 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
383 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
384 { "notb", PPC_FEATURE_NO_TB, 0 },
385 { "pa6t", PPC_FEATURE_PA6T, 0 },
386 { "power4", PPC_FEATURE_POWER4, 0 },
387 { "power5", PPC_FEATURE_POWER5, 0 },
388 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
389 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
390 { "ppc32", PPC_FEATURE_32, 0 },
391 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
392 { "ppc64", PPC_FEATURE_64, 0 },
393 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
394 { "smt", PPC_FEATURE_SMT, 0 },
395 { "spe", PPC_FEATURE_HAS_SPE, 0 },
396 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
397 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
398 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
400 /* AT_HWCAP2 masks. */
401 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
402 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
403 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
404 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
405 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
406 { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 },
407 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
408 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
409 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
410 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
411 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 },
412 { "darn", PPC_FEATURE2_DARN, 1 },
413 { "scv", PPC_FEATURE2_SCV, 1 }
416 /* On PowerPC, we have a limited number of target clones that we care about
417 which means we can use an array to hold the options, rather than having more
418 elaborate data structures to identify each possible variation. Order the
419 clones from the default to the highest ISA. */
421 CLONE_DEFAULT = 0, /* default clone. */
422 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
423 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
424 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
425 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
429 /* Map compiler ISA bits into HWCAP names. */
431 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
432 const char *name; /* name to use in __builtin_cpu_supports. */
435 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
436 { 0, "" }, /* Default options. */
437 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
438 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
439 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
440 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
444 /* Newer LIBCs explicitly export this symbol to declare that they provide
445 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
446 reference to this symbol whenever we expand a CPU builtin, so that
447 we never link against an old LIBC. */
448 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
450 /* True if we have expanded a CPU builtin. */
453 /* Pointer to function (in rs6000-c.c) that can define or undefine target
454 macros that have changed. Languages that don't support the preprocessor
455 don't link in rs6000-c.c, so we can't call it directly. */
456 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
458 /* Simplfy register classes into simpler classifications. We assume
459 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
460 check for standard register classes (gpr/floating/altivec/vsx) and
461 floating/vector classes (float/altivec/vsx). */
463 enum rs6000_reg_type {
474 /* Map register class to register type. */
475 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
477 /* First/last register type for the 'normal' register types (i.e. general
478 purpose, floating point, altivec, and VSX registers). */
479 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
481 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
484 /* Register classes we care about in secondary reload or go if legitimate
485 address. We only need to worry about GPR, FPR, and Altivec registers here,
486 along an ANY field that is the OR of the 3 register classes. */
488 enum rs6000_reload_reg_type {
489 RELOAD_REG_GPR, /* General purpose registers. */
490 RELOAD_REG_FPR, /* Traditional floating point regs. */
491 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
492 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
496 /* For setting up register classes, loop through the 3 register classes mapping
497 into real registers, and skip the ANY class, which is just an OR of the
499 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
500 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
502 /* Map reload register type to a register in the register class. */
503 struct reload_reg_map_type {
504 const char *name; /* Register class name. */
505 int reg; /* Register in the register class. */
508 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
509 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
510 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
511 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
512 { "Any", -1 }, /* RELOAD_REG_ANY. */
515 /* Mask bits for each register class, indexed per mode. Historically the
516 compiler has been more restrictive which types can do PRE_MODIFY instead of
517 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
518 typedef unsigned char addr_mask_type;
520 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
521 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
522 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
523 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
524 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
525 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
526 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
527 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
529 /* Register type masks based on the type, of valid addressing modes. */
530 struct rs6000_reg_addr {
531 enum insn_code reload_load; /* INSN to reload for loading. */
532 enum insn_code reload_store; /* INSN to reload for storing. */
533 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
534 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
535 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
536 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
537 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
540 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
542 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
544 mode_supports_pre_incdec_p (machine_mode mode)
546 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
550 /* Helper function to say whether a mode supports PRE_MODIFY. */
552 mode_supports_pre_modify_p (machine_mode mode)
554 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
558 /* Return true if we have D-form addressing in altivec registers. */
560 mode_supports_vmx_dform (machine_mode mode)
562 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
565 /* Return true if we have D-form addressing in VSX registers. This addressing
566 is more limited than normal d-form addressing in that the offset must be
567 aligned on a 16-byte boundary. */
569 mode_supports_dq_form (machine_mode mode)
571 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
575 /* Given that there exists at least one variable that is set (produced)
576 by OUT_INSN and read (consumed) by IN_INSN, return true iff
577 IN_INSN represents one or more memory store operations and none of
578 the variables set by OUT_INSN is used by IN_INSN as the address of a
579 store operation. If either IN_INSN or OUT_INSN does not represent
580 a "single" RTL SET expression (as loosely defined by the
581 implementation of the single_set function) or a PARALLEL with only
582 SETs, CLOBBERs, and USEs inside, this function returns false.
584 This rs6000-specific version of store_data_bypass_p checks for
585 certain conditions that result in assertion failures (and internal
586 compiler errors) in the generic store_data_bypass_p function and
587 returns false rather than calling store_data_bypass_p if one of the
588 problematic conditions is detected. */
591 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
598 in_set = single_set (in_insn);
601 if (MEM_P (SET_DEST (in_set)))
603 out_set = single_set (out_insn);
606 out_pat = PATTERN (out_insn);
607 if (GET_CODE (out_pat) == PARALLEL)
609 for (i = 0; i < XVECLEN (out_pat, 0); i++)
611 out_exp = XVECEXP (out_pat, 0, i);
612 if ((GET_CODE (out_exp) == CLOBBER)
613 || (GET_CODE (out_exp) == USE))
615 else if (GET_CODE (out_exp) != SET)
624 in_pat = PATTERN (in_insn);
625 if (GET_CODE (in_pat) != PARALLEL)
628 for (i = 0; i < XVECLEN (in_pat, 0); i++)
630 in_exp = XVECEXP (in_pat, 0, i);
631 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
633 else if (GET_CODE (in_exp) != SET)
636 if (MEM_P (SET_DEST (in_exp)))
638 out_set = single_set (out_insn);
641 out_pat = PATTERN (out_insn);
642 if (GET_CODE (out_pat) != PARALLEL)
644 for (j = 0; j < XVECLEN (out_pat, 0); j++)
646 out_exp = XVECEXP (out_pat, 0, j);
647 if ((GET_CODE (out_exp) == CLOBBER)
648 || (GET_CODE (out_exp) == USE))
650 else if (GET_CODE (out_exp) != SET)
657 return store_data_bypass_p (out_insn, in_insn);
661 /* Processor costs (relative to an add) */
663 const struct processor_costs *rs6000_cost;
665 /* Instruction size costs on 32bit processors. */
667 struct processor_costs size32_cost = {
668 COSTS_N_INSNS (1), /* mulsi */
669 COSTS_N_INSNS (1), /* mulsi_const */
670 COSTS_N_INSNS (1), /* mulsi_const9 */
671 COSTS_N_INSNS (1), /* muldi */
672 COSTS_N_INSNS (1), /* divsi */
673 COSTS_N_INSNS (1), /* divdi */
674 COSTS_N_INSNS (1), /* fp */
675 COSTS_N_INSNS (1), /* dmul */
676 COSTS_N_INSNS (1), /* sdiv */
677 COSTS_N_INSNS (1), /* ddiv */
678 32, /* cache line size */
682 0, /* SF->DF convert */
685 /* Instruction size costs on 64bit processors. */
687 struct processor_costs size64_cost = {
688 COSTS_N_INSNS (1), /* mulsi */
689 COSTS_N_INSNS (1), /* mulsi_const */
690 COSTS_N_INSNS (1), /* mulsi_const9 */
691 COSTS_N_INSNS (1), /* muldi */
692 COSTS_N_INSNS (1), /* divsi */
693 COSTS_N_INSNS (1), /* divdi */
694 COSTS_N_INSNS (1), /* fp */
695 COSTS_N_INSNS (1), /* dmul */
696 COSTS_N_INSNS (1), /* sdiv */
697 COSTS_N_INSNS (1), /* ddiv */
698 128, /* cache line size */
702 0, /* SF->DF convert */
705 /* Instruction costs on RS64A processors. */
707 struct processor_costs rs64a_cost = {
708 COSTS_N_INSNS (20), /* mulsi */
709 COSTS_N_INSNS (12), /* mulsi_const */
710 COSTS_N_INSNS (8), /* mulsi_const9 */
711 COSTS_N_INSNS (34), /* muldi */
712 COSTS_N_INSNS (65), /* divsi */
713 COSTS_N_INSNS (67), /* divdi */
714 COSTS_N_INSNS (4), /* fp */
715 COSTS_N_INSNS (4), /* dmul */
716 COSTS_N_INSNS (31), /* sdiv */
717 COSTS_N_INSNS (31), /* ddiv */
718 128, /* cache line size */
722 0, /* SF->DF convert */
725 /* Instruction costs on MPCCORE processors. */
727 struct processor_costs mpccore_cost = {
728 COSTS_N_INSNS (2), /* mulsi */
729 COSTS_N_INSNS (2), /* mulsi_const */
730 COSTS_N_INSNS (2), /* mulsi_const9 */
731 COSTS_N_INSNS (2), /* muldi */
732 COSTS_N_INSNS (6), /* divsi */
733 COSTS_N_INSNS (6), /* divdi */
734 COSTS_N_INSNS (4), /* fp */
735 COSTS_N_INSNS (5), /* dmul */
736 COSTS_N_INSNS (10), /* sdiv */
737 COSTS_N_INSNS (17), /* ddiv */
738 32, /* cache line size */
742 0, /* SF->DF convert */
745 /* Instruction costs on PPC403 processors. */
747 struct processor_costs ppc403_cost = {
748 COSTS_N_INSNS (4), /* mulsi */
749 COSTS_N_INSNS (4), /* mulsi_const */
750 COSTS_N_INSNS (4), /* mulsi_const9 */
751 COSTS_N_INSNS (4), /* muldi */
752 COSTS_N_INSNS (33), /* divsi */
753 COSTS_N_INSNS (33), /* divdi */
754 COSTS_N_INSNS (11), /* fp */
755 COSTS_N_INSNS (11), /* dmul */
756 COSTS_N_INSNS (11), /* sdiv */
757 COSTS_N_INSNS (11), /* ddiv */
758 32, /* cache line size */
762 0, /* SF->DF convert */
765 /* Instruction costs on PPC405 processors. */
767 struct processor_costs ppc405_cost = {
768 COSTS_N_INSNS (5), /* mulsi */
769 COSTS_N_INSNS (4), /* mulsi_const */
770 COSTS_N_INSNS (3), /* mulsi_const9 */
771 COSTS_N_INSNS (5), /* muldi */
772 COSTS_N_INSNS (35), /* divsi */
773 COSTS_N_INSNS (35), /* divdi */
774 COSTS_N_INSNS (11), /* fp */
775 COSTS_N_INSNS (11), /* dmul */
776 COSTS_N_INSNS (11), /* sdiv */
777 COSTS_N_INSNS (11), /* ddiv */
778 32, /* cache line size */
782 0, /* SF->DF convert */
785 /* Instruction costs on PPC440 processors. */
787 struct processor_costs ppc440_cost = {
788 COSTS_N_INSNS (3), /* mulsi */
789 COSTS_N_INSNS (2), /* mulsi_const */
790 COSTS_N_INSNS (2), /* mulsi_const9 */
791 COSTS_N_INSNS (3), /* muldi */
792 COSTS_N_INSNS (34), /* divsi */
793 COSTS_N_INSNS (34), /* divdi */
794 COSTS_N_INSNS (5), /* fp */
795 COSTS_N_INSNS (5), /* dmul */
796 COSTS_N_INSNS (19), /* sdiv */
797 COSTS_N_INSNS (33), /* ddiv */
798 32, /* cache line size */
802 0, /* SF->DF convert */
805 /* Instruction costs on PPC476 processors. */
807 struct processor_costs ppc476_cost = {
808 COSTS_N_INSNS (4), /* mulsi */
809 COSTS_N_INSNS (4), /* mulsi_const */
810 COSTS_N_INSNS (4), /* mulsi_const9 */
811 COSTS_N_INSNS (4), /* muldi */
812 COSTS_N_INSNS (11), /* divsi */
813 COSTS_N_INSNS (11), /* divdi */
814 COSTS_N_INSNS (6), /* fp */
815 COSTS_N_INSNS (6), /* dmul */
816 COSTS_N_INSNS (19), /* sdiv */
817 COSTS_N_INSNS (33), /* ddiv */
818 32, /* l1 cache line size */
822 0, /* SF->DF convert */
825 /* Instruction costs on PPC601 processors. */
827 struct processor_costs ppc601_cost = {
828 COSTS_N_INSNS (5), /* mulsi */
829 COSTS_N_INSNS (5), /* mulsi_const */
830 COSTS_N_INSNS (5), /* mulsi_const9 */
831 COSTS_N_INSNS (5), /* muldi */
832 COSTS_N_INSNS (36), /* divsi */
833 COSTS_N_INSNS (36), /* divdi */
834 COSTS_N_INSNS (4), /* fp */
835 COSTS_N_INSNS (5), /* dmul */
836 COSTS_N_INSNS (17), /* sdiv */
837 COSTS_N_INSNS (31), /* ddiv */
838 32, /* cache line size */
842 0, /* SF->DF convert */
845 /* Instruction costs on PPC603 processors. */
847 struct processor_costs ppc603_cost = {
848 COSTS_N_INSNS (5), /* mulsi */
849 COSTS_N_INSNS (3), /* mulsi_const */
850 COSTS_N_INSNS (2), /* mulsi_const9 */
851 COSTS_N_INSNS (5), /* muldi */
852 COSTS_N_INSNS (37), /* divsi */
853 COSTS_N_INSNS (37), /* divdi */
854 COSTS_N_INSNS (3), /* fp */
855 COSTS_N_INSNS (4), /* dmul */
856 COSTS_N_INSNS (18), /* sdiv */
857 COSTS_N_INSNS (33), /* ddiv */
858 32, /* cache line size */
862 0, /* SF->DF convert */
865 /* Instruction costs on PPC604 processors. */
867 struct processor_costs ppc604_cost = {
868 COSTS_N_INSNS (4), /* mulsi */
869 COSTS_N_INSNS (4), /* mulsi_const */
870 COSTS_N_INSNS (4), /* mulsi_const9 */
871 COSTS_N_INSNS (4), /* muldi */
872 COSTS_N_INSNS (20), /* divsi */
873 COSTS_N_INSNS (20), /* divdi */
874 COSTS_N_INSNS (3), /* fp */
875 COSTS_N_INSNS (3), /* dmul */
876 COSTS_N_INSNS (18), /* sdiv */
877 COSTS_N_INSNS (32), /* ddiv */
878 32, /* cache line size */
882 0, /* SF->DF convert */
885 /* Instruction costs on PPC604e processors. */
887 struct processor_costs ppc604e_cost = {
888 COSTS_N_INSNS (2), /* mulsi */
889 COSTS_N_INSNS (2), /* mulsi_const */
890 COSTS_N_INSNS (2), /* mulsi_const9 */
891 COSTS_N_INSNS (2), /* muldi */
892 COSTS_N_INSNS (20), /* divsi */
893 COSTS_N_INSNS (20), /* divdi */
894 COSTS_N_INSNS (3), /* fp */
895 COSTS_N_INSNS (3), /* dmul */
896 COSTS_N_INSNS (18), /* sdiv */
897 COSTS_N_INSNS (32), /* ddiv */
898 32, /* cache line size */
902 0, /* SF->DF convert */
905 /* Instruction costs on PPC620 processors. */
907 struct processor_costs ppc620_cost = {
908 COSTS_N_INSNS (5), /* mulsi */
909 COSTS_N_INSNS (4), /* mulsi_const */
910 COSTS_N_INSNS (3), /* mulsi_const9 */
911 COSTS_N_INSNS (7), /* muldi */
912 COSTS_N_INSNS (21), /* divsi */
913 COSTS_N_INSNS (37), /* divdi */
914 COSTS_N_INSNS (3), /* fp */
915 COSTS_N_INSNS (3), /* dmul */
916 COSTS_N_INSNS (18), /* sdiv */
917 COSTS_N_INSNS (32), /* ddiv */
918 128, /* cache line size */
922 0, /* SF->DF convert */
925 /* Instruction costs on PPC630 processors. */
927 struct processor_costs ppc630_cost = {
928 COSTS_N_INSNS (5), /* mulsi */
929 COSTS_N_INSNS (4), /* mulsi_const */
930 COSTS_N_INSNS (3), /* mulsi_const9 */
931 COSTS_N_INSNS (7), /* muldi */
932 COSTS_N_INSNS (21), /* divsi */
933 COSTS_N_INSNS (37), /* divdi */
934 COSTS_N_INSNS (3), /* fp */
935 COSTS_N_INSNS (3), /* dmul */
936 COSTS_N_INSNS (17), /* sdiv */
937 COSTS_N_INSNS (21), /* ddiv */
938 128, /* cache line size */
942 0, /* SF->DF convert */
945 /* Instruction costs on Cell processor. */
946 /* COSTS_N_INSNS (1) ~ one add. */
948 struct processor_costs ppccell_cost = {
949 COSTS_N_INSNS (9/2)+2, /* mulsi */
950 COSTS_N_INSNS (6/2), /* mulsi_const */
951 COSTS_N_INSNS (6/2), /* mulsi_const9 */
952 COSTS_N_INSNS (15/2)+2, /* muldi */
953 COSTS_N_INSNS (38/2), /* divsi */
954 COSTS_N_INSNS (70/2), /* divdi */
955 COSTS_N_INSNS (10/2), /* fp */
956 COSTS_N_INSNS (10/2), /* dmul */
957 COSTS_N_INSNS (74/2), /* sdiv */
958 COSTS_N_INSNS (74/2), /* ddiv */
959 128, /* cache line size */
963 0, /* SF->DF convert */
966 /* Instruction costs on PPC750 and PPC7400 processors. */
968 struct processor_costs ppc750_cost = {
969 COSTS_N_INSNS (5), /* mulsi */
970 COSTS_N_INSNS (3), /* mulsi_const */
971 COSTS_N_INSNS (2), /* mulsi_const9 */
972 COSTS_N_INSNS (5), /* muldi */
973 COSTS_N_INSNS (17), /* divsi */
974 COSTS_N_INSNS (17), /* divdi */
975 COSTS_N_INSNS (3), /* fp */
976 COSTS_N_INSNS (3), /* dmul */
977 COSTS_N_INSNS (17), /* sdiv */
978 COSTS_N_INSNS (31), /* ddiv */
979 32, /* cache line size */
983 0, /* SF->DF convert */
986 /* Instruction costs on PPC7450 processors. */
988 struct processor_costs ppc7450_cost = {
989 COSTS_N_INSNS (4), /* mulsi */
990 COSTS_N_INSNS (3), /* mulsi_const */
991 COSTS_N_INSNS (3), /* mulsi_const9 */
992 COSTS_N_INSNS (4), /* muldi */
993 COSTS_N_INSNS (23), /* divsi */
994 COSTS_N_INSNS (23), /* divdi */
995 COSTS_N_INSNS (5), /* fp */
996 COSTS_N_INSNS (5), /* dmul */
997 COSTS_N_INSNS (21), /* sdiv */
998 COSTS_N_INSNS (35), /* ddiv */
999 32, /* cache line size */
1001 1024, /* l2 cache */
1003 0, /* SF->DF convert */
1006 /* Instruction costs on PPC8540 processors. */
1008 struct processor_costs ppc8540_cost = {
1009 COSTS_N_INSNS (4), /* mulsi */
1010 COSTS_N_INSNS (4), /* mulsi_const */
1011 COSTS_N_INSNS (4), /* mulsi_const9 */
1012 COSTS_N_INSNS (4), /* muldi */
1013 COSTS_N_INSNS (19), /* divsi */
1014 COSTS_N_INSNS (19), /* divdi */
1015 COSTS_N_INSNS (4), /* fp */
1016 COSTS_N_INSNS (4), /* dmul */
1017 COSTS_N_INSNS (29), /* sdiv */
1018 COSTS_N_INSNS (29), /* ddiv */
1019 32, /* cache line size */
1022 1, /* prefetch streams /*/
1023 0, /* SF->DF convert */
1026 /* Instruction costs on E300C2 and E300C3 cores. */
1028 struct processor_costs ppce300c2c3_cost = {
1029 COSTS_N_INSNS (4), /* mulsi */
1030 COSTS_N_INSNS (4), /* mulsi_const */
1031 COSTS_N_INSNS (4), /* mulsi_const9 */
1032 COSTS_N_INSNS (4), /* muldi */
1033 COSTS_N_INSNS (19), /* divsi */
1034 COSTS_N_INSNS (19), /* divdi */
1035 COSTS_N_INSNS (3), /* fp */
1036 COSTS_N_INSNS (4), /* dmul */
1037 COSTS_N_INSNS (18), /* sdiv */
1038 COSTS_N_INSNS (33), /* ddiv */
1042 1, /* prefetch streams /*/
1043 0, /* SF->DF convert */
1046 /* Instruction costs on PPCE500MC processors. */
1048 struct processor_costs ppce500mc_cost = {
1049 COSTS_N_INSNS (4), /* mulsi */
1050 COSTS_N_INSNS (4), /* mulsi_const */
1051 COSTS_N_INSNS (4), /* mulsi_const9 */
1052 COSTS_N_INSNS (4), /* muldi */
1053 COSTS_N_INSNS (14), /* divsi */
1054 COSTS_N_INSNS (14), /* divdi */
1055 COSTS_N_INSNS (8), /* fp */
1056 COSTS_N_INSNS (10), /* dmul */
1057 COSTS_N_INSNS (36), /* sdiv */
1058 COSTS_N_INSNS (66), /* ddiv */
1059 64, /* cache line size */
1062 1, /* prefetch streams /*/
1063 0, /* SF->DF convert */
1066 /* Instruction costs on PPCE500MC64 processors. */
1068 struct processor_costs ppce500mc64_cost = {
1069 COSTS_N_INSNS (4), /* mulsi */
1070 COSTS_N_INSNS (4), /* mulsi_const */
1071 COSTS_N_INSNS (4), /* mulsi_const9 */
1072 COSTS_N_INSNS (4), /* muldi */
1073 COSTS_N_INSNS (14), /* divsi */
1074 COSTS_N_INSNS (14), /* divdi */
1075 COSTS_N_INSNS (4), /* fp */
1076 COSTS_N_INSNS (10), /* dmul */
1077 COSTS_N_INSNS (36), /* sdiv */
1078 COSTS_N_INSNS (66), /* ddiv */
1079 64, /* cache line size */
1082 1, /* prefetch streams /*/
1083 0, /* SF->DF convert */
1086 /* Instruction costs on PPCE5500 processors. */
1088 struct processor_costs ppce5500_cost = {
1089 COSTS_N_INSNS (5), /* mulsi */
1090 COSTS_N_INSNS (5), /* mulsi_const */
1091 COSTS_N_INSNS (4), /* mulsi_const9 */
1092 COSTS_N_INSNS (5), /* muldi */
1093 COSTS_N_INSNS (14), /* divsi */
1094 COSTS_N_INSNS (14), /* divdi */
1095 COSTS_N_INSNS (7), /* fp */
1096 COSTS_N_INSNS (10), /* dmul */
1097 COSTS_N_INSNS (36), /* sdiv */
1098 COSTS_N_INSNS (66), /* ddiv */
1099 64, /* cache line size */
1102 1, /* prefetch streams /*/
1103 0, /* SF->DF convert */
1106 /* Instruction costs on PPCE6500 processors. */
1108 struct processor_costs ppce6500_cost = {
1109 COSTS_N_INSNS (5), /* mulsi */
1110 COSTS_N_INSNS (5), /* mulsi_const */
1111 COSTS_N_INSNS (4), /* mulsi_const9 */
1112 COSTS_N_INSNS (5), /* muldi */
1113 COSTS_N_INSNS (14), /* divsi */
1114 COSTS_N_INSNS (14), /* divdi */
1115 COSTS_N_INSNS (7), /* fp */
1116 COSTS_N_INSNS (10), /* dmul */
1117 COSTS_N_INSNS (36), /* sdiv */
1118 COSTS_N_INSNS (66), /* ddiv */
1119 64, /* cache line size */
1122 1, /* prefetch streams /*/
1123 0, /* SF->DF convert */
1126 /* Instruction costs on AppliedMicro Titan processors. */
1128 struct processor_costs titan_cost = {
1129 COSTS_N_INSNS (5), /* mulsi */
1130 COSTS_N_INSNS (5), /* mulsi_const */
1131 COSTS_N_INSNS (5), /* mulsi_const9 */
1132 COSTS_N_INSNS (5), /* muldi */
1133 COSTS_N_INSNS (18), /* divsi */
1134 COSTS_N_INSNS (18), /* divdi */
1135 COSTS_N_INSNS (10), /* fp */
1136 COSTS_N_INSNS (10), /* dmul */
1137 COSTS_N_INSNS (46), /* sdiv */
1138 COSTS_N_INSNS (72), /* ddiv */
1139 32, /* cache line size */
1142 1, /* prefetch streams /*/
1143 0, /* SF->DF convert */
1146 /* Instruction costs on POWER4 and POWER5 processors. */
1148 struct processor_costs power4_cost = {
1149 COSTS_N_INSNS (3), /* mulsi */
1150 COSTS_N_INSNS (2), /* mulsi_const */
1151 COSTS_N_INSNS (2), /* mulsi_const9 */
1152 COSTS_N_INSNS (4), /* muldi */
1153 COSTS_N_INSNS (18), /* divsi */
1154 COSTS_N_INSNS (34), /* divdi */
1155 COSTS_N_INSNS (3), /* fp */
1156 COSTS_N_INSNS (3), /* dmul */
1157 COSTS_N_INSNS (17), /* sdiv */
1158 COSTS_N_INSNS (17), /* ddiv */
1159 128, /* cache line size */
1161 1024, /* l2 cache */
1162 8, /* prefetch streams /*/
1163 0, /* SF->DF convert */
1166 /* Instruction costs on POWER6 processors. */
1168 struct processor_costs power6_cost = {
1169 COSTS_N_INSNS (8), /* mulsi */
1170 COSTS_N_INSNS (8), /* mulsi_const */
1171 COSTS_N_INSNS (8), /* mulsi_const9 */
1172 COSTS_N_INSNS (8), /* muldi */
1173 COSTS_N_INSNS (22), /* divsi */
1174 COSTS_N_INSNS (28), /* divdi */
1175 COSTS_N_INSNS (3), /* fp */
1176 COSTS_N_INSNS (3), /* dmul */
1177 COSTS_N_INSNS (13), /* sdiv */
1178 COSTS_N_INSNS (16), /* ddiv */
1179 128, /* cache line size */
1181 2048, /* l2 cache */
1182 16, /* prefetch streams */
1183 0, /* SF->DF convert */
1186 /* Instruction costs on POWER7 processors. */
1188 struct processor_costs power7_cost = {
1189 COSTS_N_INSNS (2), /* mulsi */
1190 COSTS_N_INSNS (2), /* mulsi_const */
1191 COSTS_N_INSNS (2), /* mulsi_const9 */
1192 COSTS_N_INSNS (2), /* muldi */
1193 COSTS_N_INSNS (18), /* divsi */
1194 COSTS_N_INSNS (34), /* divdi */
1195 COSTS_N_INSNS (3), /* fp */
1196 COSTS_N_INSNS (3), /* dmul */
1197 COSTS_N_INSNS (13), /* sdiv */
1198 COSTS_N_INSNS (16), /* ddiv */
1199 128, /* cache line size */
1202 12, /* prefetch streams */
1203 COSTS_N_INSNS (3), /* SF->DF convert */
1206 /* Instruction costs on POWER8 processors. */
1208 struct processor_costs power8_cost = {
1209 COSTS_N_INSNS (3), /* mulsi */
1210 COSTS_N_INSNS (3), /* mulsi_const */
1211 COSTS_N_INSNS (3), /* mulsi_const9 */
1212 COSTS_N_INSNS (3), /* muldi */
1213 COSTS_N_INSNS (19), /* divsi */
1214 COSTS_N_INSNS (35), /* divdi */
1215 COSTS_N_INSNS (3), /* fp */
1216 COSTS_N_INSNS (3), /* dmul */
1217 COSTS_N_INSNS (14), /* sdiv */
1218 COSTS_N_INSNS (17), /* ddiv */
1219 128, /* cache line size */
1222 12, /* prefetch streams */
1223 COSTS_N_INSNS (3), /* SF->DF convert */
1226 /* Instruction costs on POWER9 processors. */
1228 struct processor_costs power9_cost = {
1229 COSTS_N_INSNS (3), /* mulsi */
1230 COSTS_N_INSNS (3), /* mulsi_const */
1231 COSTS_N_INSNS (3), /* mulsi_const9 */
1232 COSTS_N_INSNS (3), /* muldi */
1233 COSTS_N_INSNS (8), /* divsi */
1234 COSTS_N_INSNS (12), /* divdi */
1235 COSTS_N_INSNS (3), /* fp */
1236 COSTS_N_INSNS (3), /* dmul */
1237 COSTS_N_INSNS (13), /* sdiv */
1238 COSTS_N_INSNS (18), /* ddiv */
1239 128, /* cache line size */
1242 8, /* prefetch streams */
1243 COSTS_N_INSNS (3), /* SF->DF convert */
1246 /* Instruction costs on POWER A2 processors. */
1248 struct processor_costs ppca2_cost = {
1249 COSTS_N_INSNS (16), /* mulsi */
1250 COSTS_N_INSNS (16), /* mulsi_const */
1251 COSTS_N_INSNS (16), /* mulsi_const9 */
1252 COSTS_N_INSNS (16), /* muldi */
1253 COSTS_N_INSNS (22), /* divsi */
1254 COSTS_N_INSNS (28), /* divdi */
1255 COSTS_N_INSNS (3), /* fp */
1256 COSTS_N_INSNS (3), /* dmul */
1257 COSTS_N_INSNS (59), /* sdiv */
1258 COSTS_N_INSNS (72), /* ddiv */
1261 2048, /* l2 cache */
1262 16, /* prefetch streams */
1263 0, /* SF->DF convert */
1267 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1268 #undef RS6000_BUILTIN_0
1269 #undef RS6000_BUILTIN_1
1270 #undef RS6000_BUILTIN_2
1271 #undef RS6000_BUILTIN_3
1272 #undef RS6000_BUILTIN_A
1273 #undef RS6000_BUILTIN_D
1274 #undef RS6000_BUILTIN_H
1275 #undef RS6000_BUILTIN_P
1276 #undef RS6000_BUILTIN_X
1278 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1279 { NAME, ICODE, MASK, ATTR },
1281 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1282 { NAME, ICODE, MASK, ATTR },
1284 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1285 { NAME, ICODE, MASK, ATTR },
1287 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1288 { NAME, ICODE, MASK, ATTR },
1290 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1291 { NAME, ICODE, MASK, ATTR },
1293 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1294 { NAME, ICODE, MASK, ATTR },
1296 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1297 { NAME, ICODE, MASK, ATTR },
1299 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1300 { NAME, ICODE, MASK, ATTR },
1302 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1303 { NAME, ICODE, MASK, ATTR },
1305 struct rs6000_builtin_info_type {
1307 const enum insn_code icode;
1308 const HOST_WIDE_INT mask;
1309 const unsigned attr;
1312 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1314 #include "rs6000-builtin.def"
1317 #undef RS6000_BUILTIN_0
1318 #undef RS6000_BUILTIN_1
1319 #undef RS6000_BUILTIN_2
1320 #undef RS6000_BUILTIN_3
1321 #undef RS6000_BUILTIN_A
1322 #undef RS6000_BUILTIN_D
1323 #undef RS6000_BUILTIN_H
1324 #undef RS6000_BUILTIN_P
1325 #undef RS6000_BUILTIN_X
1327 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1328 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1331 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1332 static struct machine_function * rs6000_init_machine_status (void);
1333 static int rs6000_ra_ever_killed (void);
1334 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1335 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1336 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1337 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1338 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1339 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1340 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1341 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1343 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1345 static bool is_microcoded_insn (rtx_insn *);
1346 static bool is_nonpipeline_insn (rtx_insn *);
1347 static bool is_cracked_insn (rtx_insn *);
1348 static bool is_load_insn (rtx, rtx *);
1349 static bool is_store_insn (rtx, rtx *);
1350 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1351 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1352 static bool insn_must_be_first_in_group (rtx_insn *);
1353 static bool insn_must_be_last_in_group (rtx_insn *);
1354 static void altivec_init_builtins (void);
1355 static tree builtin_function_type (machine_mode, machine_mode,
1356 machine_mode, machine_mode,
1357 enum rs6000_builtins, const char *name);
1358 static void rs6000_common_init_builtins (void);
1359 static void htm_init_builtins (void);
1360 static rs6000_stack_t *rs6000_stack_info (void);
1361 static void is_altivec_return_reg (rtx, void *);
1362 int easy_vector_constant (rtx, machine_mode);
1363 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1364 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1365 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1368 static void macho_branch_islands (void);
1369 static tree get_prev_label (tree);
1371 static bool rs6000_mode_dependent_address (const_rtx);
1372 static bool rs6000_debug_mode_dependent_address (const_rtx);
1373 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1374 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1376 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1379 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1380 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1382 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1385 static bool rs6000_debug_can_change_mode_class (machine_mode,
1388 static bool rs6000_save_toc_in_prologue_p (void);
1389 static rtx rs6000_internal_arg_pointer (void);
1391 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1392 = rs6000_mode_dependent_address;
1394 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1396 = rs6000_secondary_reload_class;
1398 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1399 = rs6000_preferred_reload_class;
1401 const int INSN_NOT_AVAILABLE = -1;
1403 static void rs6000_print_isa_options (FILE *, int, const char *,
1405 static void rs6000_print_builtin_options (FILE *, int, const char *,
1407 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1409 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1410 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1411 enum rs6000_reg_type,
1413 secondary_reload_info *,
1415 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1416 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1417 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1419 /* Hash table stuff for keeping track of TOC entries. */
1421 struct GTY((for_user)) toc_hash_struct
1423 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1424 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1426 machine_mode key_mode;
1430 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1432 static hashval_t hash (toc_hash_struct *);
1433 static bool equal (toc_hash_struct *, toc_hash_struct *);
1436 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1438 /* Hash table to keep track of the argument types for builtin functions. */
1440 struct GTY((for_user)) builtin_hash_struct
1443 machine_mode mode[4]; /* return value + 3 arguments. */
1444 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1447 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1449 static hashval_t hash (builtin_hash_struct *);
1450 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1453 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1456 /* Default register names. */
1457 char rs6000_reg_names[][8] =
1460 "0", "1", "2", "3", "4", "5", "6", "7",
1461 "8", "9", "10", "11", "12", "13", "14", "15",
1462 "16", "17", "18", "19", "20", "21", "22", "23",
1463 "24", "25", "26", "27", "28", "29", "30", "31",
1465 "0", "1", "2", "3", "4", "5", "6", "7",
1466 "8", "9", "10", "11", "12", "13", "14", "15",
1467 "16", "17", "18", "19", "20", "21", "22", "23",
1468 "24", "25", "26", "27", "28", "29", "30", "31",
1470 "0", "1", "2", "3", "4", "5", "6", "7",
1471 "8", "9", "10", "11", "12", "13", "14", "15",
1472 "16", "17", "18", "19", "20", "21", "22", "23",
1473 "24", "25", "26", "27", "28", "29", "30", "31",
1475 "lr", "ctr", "ca", "ap",
1477 "0", "1", "2", "3", "4", "5", "6", "7",
1478 /* vrsave vscr sfp */
1479 "vrsave", "vscr", "sfp",
1482 #ifdef TARGET_REGNAMES
1483 static const char alt_reg_names[][8] =
1486 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1487 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1488 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1489 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1491 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1492 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1493 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1494 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1496 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1497 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1498 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1499 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1501 "lr", "ctr", "ca", "ap",
1503 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1504 /* vrsave vscr sfp */
1505 "vrsave", "vscr", "sfp",
1509 /* Table of valid machine attributes. */
1511 static const struct attribute_spec rs6000_attribute_table[] =
1513 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1514 affects_type_identity, handler, exclude } */
1515 { "altivec", 1, 1, false, true, false, false,
1516 rs6000_handle_altivec_attribute, NULL },
1517 { "longcall", 0, 0, false, true, true, false,
1518 rs6000_handle_longcall_attribute, NULL },
1519 { "shortcall", 0, 0, false, true, true, false,
1520 rs6000_handle_longcall_attribute, NULL },
1521 { "ms_struct", 0, 0, false, false, false, false,
1522 rs6000_handle_struct_attribute, NULL },
1523 { "gcc_struct", 0, 0, false, false, false, false,
1524 rs6000_handle_struct_attribute, NULL },
1525 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1526 SUBTARGET_ATTRIBUTE_TABLE,
1528 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1531 #ifndef TARGET_PROFILE_KERNEL
1532 #define TARGET_PROFILE_KERNEL 0
1535 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1536 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1538 /* Initialize the GCC target structure. */
1539 #undef TARGET_ATTRIBUTE_TABLE
1540 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1541 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1542 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1543 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1544 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1546 #undef TARGET_ASM_ALIGNED_DI_OP
1547 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1549 /* Default unaligned ops are only provided for ELF. Find the ops needed
1550 for non-ELF systems. */
1551 #ifndef OBJECT_FORMAT_ELF
1553 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1555 #undef TARGET_ASM_UNALIGNED_HI_OP
1556 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1557 #undef TARGET_ASM_UNALIGNED_SI_OP
1558 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1559 #undef TARGET_ASM_UNALIGNED_DI_OP
1560 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1563 #undef TARGET_ASM_UNALIGNED_HI_OP
1564 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1565 #undef TARGET_ASM_UNALIGNED_SI_OP
1566 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1567 #undef TARGET_ASM_UNALIGNED_DI_OP
1568 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1569 #undef TARGET_ASM_ALIGNED_DI_OP
1570 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1574 /* This hook deals with fixups for relocatable code and DI-mode objects
1576 #undef TARGET_ASM_INTEGER
1577 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1579 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1580 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1581 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1584 #undef TARGET_SET_UP_BY_PROLOGUE
1585 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1587 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1588 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1589 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1590 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1591 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1592 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1593 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1594 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1595 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1596 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1597 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1598 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1600 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1601 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1603 #undef TARGET_INTERNAL_ARG_POINTER
1604 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1606 #undef TARGET_HAVE_TLS
1607 #define TARGET_HAVE_TLS HAVE_AS_TLS
1609 #undef TARGET_CANNOT_FORCE_CONST_MEM
1610 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1612 #undef TARGET_DELEGITIMIZE_ADDRESS
1613 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1615 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1616 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1618 #undef TARGET_LEGITIMATE_COMBINED_INSN
1619 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1621 #undef TARGET_ASM_FUNCTION_PROLOGUE
1622 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1623 #undef TARGET_ASM_FUNCTION_EPILOGUE
1624 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1626 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1627 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1629 #undef TARGET_LEGITIMIZE_ADDRESS
1630 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1632 #undef TARGET_SCHED_VARIABLE_ISSUE
1633 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1635 #undef TARGET_SCHED_ISSUE_RATE
1636 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1637 #undef TARGET_SCHED_ADJUST_COST
1638 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1639 #undef TARGET_SCHED_ADJUST_PRIORITY
1640 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1641 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1642 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1643 #undef TARGET_SCHED_INIT
1644 #define TARGET_SCHED_INIT rs6000_sched_init
1645 #undef TARGET_SCHED_FINISH
1646 #define TARGET_SCHED_FINISH rs6000_sched_finish
1647 #undef TARGET_SCHED_REORDER
1648 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1649 #undef TARGET_SCHED_REORDER2
1650 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1652 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1653 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1655 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1656 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1658 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1659 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1660 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1661 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1662 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1663 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1664 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1665 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1667 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1668 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1670 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1671 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1672 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1673 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1674 rs6000_builtin_support_vector_misalignment
1675 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1676 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1677 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1678 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1679 rs6000_builtin_vectorization_cost
1680 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1681 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1682 rs6000_preferred_simd_mode
1683 #undef TARGET_VECTORIZE_INIT_COST
1684 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1685 #undef TARGET_VECTORIZE_ADD_STMT_COST
1686 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1687 #undef TARGET_VECTORIZE_FINISH_COST
1688 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1689 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1690 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1692 #undef TARGET_INIT_BUILTINS
1693 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1694 #undef TARGET_BUILTIN_DECL
1695 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1697 #undef TARGET_FOLD_BUILTIN
1698 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1699 #undef TARGET_GIMPLE_FOLD_BUILTIN
1700 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1702 #undef TARGET_EXPAND_BUILTIN
1703 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1705 #undef TARGET_MANGLE_TYPE
1706 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1708 #undef TARGET_INIT_LIBFUNCS
1709 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1712 #undef TARGET_BINDS_LOCAL_P
1713 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1716 #undef TARGET_MS_BITFIELD_LAYOUT_P
1717 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1719 #undef TARGET_ASM_OUTPUT_MI_THUNK
1720 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1722 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1723 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1725 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1726 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1728 #undef TARGET_REGISTER_MOVE_COST
1729 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1730 #undef TARGET_MEMORY_MOVE_COST
1731 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1732 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1733 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1734 rs6000_ira_change_pseudo_allocno_class
1735 #undef TARGET_CANNOT_COPY_INSN_P
1736 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1737 #undef TARGET_RTX_COSTS
1738 #define TARGET_RTX_COSTS rs6000_rtx_costs
1739 #undef TARGET_ADDRESS_COST
1740 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1741 #undef TARGET_INSN_COST
1742 #define TARGET_INSN_COST rs6000_insn_cost
1744 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1745 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1747 #undef TARGET_PROMOTE_FUNCTION_MODE
1748 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1750 #undef TARGET_RETURN_IN_MEMORY
1751 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1753 #undef TARGET_RETURN_IN_MSB
1754 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1756 #undef TARGET_SETUP_INCOMING_VARARGS
1757 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1759 /* Always strict argument naming on rs6000. */
1760 #undef TARGET_STRICT_ARGUMENT_NAMING
1761 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1762 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1763 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1764 #undef TARGET_SPLIT_COMPLEX_ARG
1765 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1766 #undef TARGET_MUST_PASS_IN_STACK
1767 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1768 #undef TARGET_PASS_BY_REFERENCE
1769 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1770 #undef TARGET_ARG_PARTIAL_BYTES
1771 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1772 #undef TARGET_FUNCTION_ARG_ADVANCE
1773 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1774 #undef TARGET_FUNCTION_ARG
1775 #define TARGET_FUNCTION_ARG rs6000_function_arg
1776 #undef TARGET_FUNCTION_ARG_PADDING
1777 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1778 #undef TARGET_FUNCTION_ARG_BOUNDARY
1779 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1781 #undef TARGET_BUILD_BUILTIN_VA_LIST
1782 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1784 #undef TARGET_EXPAND_BUILTIN_VA_START
1785 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1787 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1788 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1790 #undef TARGET_EH_RETURN_FILTER_MODE
1791 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1793 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1794 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1796 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1797 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1799 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1800 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1802 #undef TARGET_FLOATN_MODE
1803 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1805 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1806 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1808 #undef TARGET_MD_ASM_ADJUST
1809 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1811 #undef TARGET_OPTION_OVERRIDE
1812 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1814 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1815 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1816 rs6000_builtin_vectorized_function
1818 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1819 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1820 rs6000_builtin_md_vectorized_function
1822 #undef TARGET_STACK_PROTECT_GUARD
1823 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1826 #undef TARGET_STACK_PROTECT_FAIL
1827 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1831 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1832 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1835 /* Use a 32-bit anchor range. This leads to sequences like:
1837 addis tmp,anchor,high
1840 where tmp itself acts as an anchor, and can be shared between
1841 accesses to the same 64k page. */
1842 #undef TARGET_MIN_ANCHOR_OFFSET
1843 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1844 #undef TARGET_MAX_ANCHOR_OFFSET
1845 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1846 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1847 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1848 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1849 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1851 #undef TARGET_BUILTIN_RECIPROCAL
1852 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1854 #undef TARGET_SECONDARY_RELOAD
1855 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1856 #undef TARGET_SECONDARY_MEMORY_NEEDED
1857 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1858 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1859 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1861 #undef TARGET_LEGITIMATE_ADDRESS_P
1862 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1864 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1865 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1867 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1868 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1870 #undef TARGET_CAN_ELIMINATE
1871 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1873 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1874 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1876 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1877 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1879 #undef TARGET_TRAMPOLINE_INIT
1880 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1882 #undef TARGET_FUNCTION_VALUE
1883 #define TARGET_FUNCTION_VALUE rs6000_function_value
1885 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1886 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1888 #undef TARGET_OPTION_SAVE
1889 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1891 #undef TARGET_OPTION_RESTORE
1892 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1894 #undef TARGET_OPTION_PRINT
1895 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1897 #undef TARGET_CAN_INLINE_P
1898 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1900 #undef TARGET_SET_CURRENT_FUNCTION
1901 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1903 #undef TARGET_LEGITIMATE_CONSTANT_P
1904 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1906 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1907 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1909 #undef TARGET_CAN_USE_DOLOOP_P
1910 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1912 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1913 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1915 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1916 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1917 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1918 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1919 #undef TARGET_UNWIND_WORD_MODE
1920 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1922 #undef TARGET_OFFLOAD_OPTIONS
1923 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1925 #undef TARGET_C_MODE_FOR_SUFFIX
1926 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1928 #undef TARGET_INVALID_BINARY_OP
1929 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1931 #undef TARGET_OPTAB_SUPPORTED_P
1932 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1934 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1935 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1937 #undef TARGET_COMPARE_VERSION_PRIORITY
1938 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1940 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1941 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1942 rs6000_generate_version_dispatcher_body
1944 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1945 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1946 rs6000_get_function_versions_dispatcher
1948 #undef TARGET_OPTION_FUNCTION_VERSIONS
1949 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1951 #undef TARGET_HARD_REGNO_NREGS
1952 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1953 #undef TARGET_HARD_REGNO_MODE_OK
1954 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1956 #undef TARGET_MODES_TIEABLE_P
1957 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1959 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1960 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1961 rs6000_hard_regno_call_part_clobbered
1963 #undef TARGET_SLOW_UNALIGNED_ACCESS
1964 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1966 #undef TARGET_CAN_CHANGE_MODE_CLASS
1967 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1969 #undef TARGET_CONSTANT_ALIGNMENT
1970 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1972 #undef TARGET_STARTING_FRAME_OFFSET
1973 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1975 #if TARGET_ELF && RS6000_WEAK
1976 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1977 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1980 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1981 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1983 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1984 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1987 /* Processor table. */
1990 const char *const name; /* Canonical processor name. */
1991 const enum processor_type processor; /* Processor type enum value. */
1992 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1995 static struct rs6000_ptt const processor_target_table[] =
1997 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1998 #include "rs6000-cpus.def"
2002 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
2006 rs6000_cpu_name_lookup (const char *name)
2012 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
2013 if (! strcmp (name, processor_target_table[i].name))
2021 /* Return number of consecutive hard regs needed starting at reg REGNO
2022 to hold something of mode MODE.
2023 This is ordinarily the length in words of a value of mode MODE
2024 but can be less for certain modes in special long registers.
2026 POWER and PowerPC GPRs hold 32 bits worth;
2027 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2030 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
2032 unsigned HOST_WIDE_INT reg_size;
2034 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2035 128-bit floating point that can go in vector registers, which has VSX
2036 memory addressing. */
2037 if (FP_REGNO_P (regno))
2038 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
2039 ? UNITS_PER_VSX_WORD
2040 : UNITS_PER_FP_WORD);
2042 else if (ALTIVEC_REGNO_P (regno))
2043 reg_size = UNITS_PER_ALTIVEC_WORD;
2046 reg_size = UNITS_PER_WORD;
2048 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
2051 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2054 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
2056 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
2058 if (COMPLEX_MODE_P (mode))
2059 mode = GET_MODE_INNER (mode);
2061 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2062 register combinations, and use PTImode where we need to deal with quad
2063 word memory operations. Don't allow quad words in the argument or frame
2064 pointer registers, just registers 0..31. */
2065 if (mode == PTImode)
2066 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2067 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2068 && ((regno & 1) == 0));
2070 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2071 implementations. Don't allow an item to be split between a FP register
2072 and an Altivec register. Allow TImode in all VSX registers if the user
2074 if (TARGET_VSX && VSX_REGNO_P (regno)
2075 && (VECTOR_MEM_VSX_P (mode)
2076 || FLOAT128_VECTOR_P (mode)
2077 || reg_addr[mode].scalar_in_vmx_p
2079 || (TARGET_VADDUQM && mode == V1TImode)))
2081 if (FP_REGNO_P (regno))
2082 return FP_REGNO_P (last_regno);
2084 if (ALTIVEC_REGNO_P (regno))
2086 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2089 return ALTIVEC_REGNO_P (last_regno);
2093 /* The GPRs can hold any mode, but values bigger than one register
2094 cannot go past R31. */
2095 if (INT_REGNO_P (regno))
2096 return INT_REGNO_P (last_regno);
2098 /* The float registers (except for VSX vector modes) can only hold floating
2099 modes and DImode. */
2100 if (FP_REGNO_P (regno))
2102 if (FLOAT128_VECTOR_P (mode))
2105 if (SCALAR_FLOAT_MODE_P (mode)
2106 && (mode != TDmode || (regno % 2) == 0)
2107 && FP_REGNO_P (last_regno))
2110 if (GET_MODE_CLASS (mode) == MODE_INT)
2112 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2115 if (TARGET_P8_VECTOR && (mode == SImode))
2118 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
2125 /* The CR register can only hold CC modes. */
2126 if (CR_REGNO_P (regno))
2127 return GET_MODE_CLASS (mode) == MODE_CC;
2129 if (CA_REGNO_P (regno))
2130 return mode == Pmode || mode == SImode;
2132 /* AltiVec only in AldyVec registers. */
2133 if (ALTIVEC_REGNO_P (regno))
2134 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2135 || mode == V1TImode);
2137 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2138 and it must be able to fit within the register set. */
2140 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2143 /* Implement TARGET_HARD_REGNO_NREGS. */
2146 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2148 return rs6000_hard_regno_nregs[mode][regno];
2151 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2154 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2156 return rs6000_hard_regno_mode_ok_p[mode][regno];
2159 /* Implement TARGET_MODES_TIEABLE_P.
2161 PTImode cannot tie with other modes because PTImode is restricted to even
2162 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2165 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2166 128-bit floating point on VSX systems ties with other vectors. */
2169 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2171 if (mode1 == PTImode)
2172 return mode2 == PTImode;
2173 if (mode2 == PTImode)
2176 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2177 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2178 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2181 if (SCALAR_FLOAT_MODE_P (mode1))
2182 return SCALAR_FLOAT_MODE_P (mode2);
2183 if (SCALAR_FLOAT_MODE_P (mode2))
2186 if (GET_MODE_CLASS (mode1) == MODE_CC)
2187 return GET_MODE_CLASS (mode2) == MODE_CC;
2188 if (GET_MODE_CLASS (mode2) == MODE_CC)
2194 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2197 rs6000_hard_regno_call_part_clobbered (rtx_insn *insn ATTRIBUTE_UNUSED,
2198 unsigned int regno, machine_mode mode)
2202 && GET_MODE_SIZE (mode) > 4
2203 && INT_REGNO_P (regno))
2207 && FP_REGNO_P (regno)
2208 && GET_MODE_SIZE (mode) > 8
2209 && !FLOAT128_2REG_P (mode))
2215 /* Print interesting facts about registers. */
2217 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2221 for (r = first_regno; r <= last_regno; ++r)
2223 const char *comma = "";
2226 if (first_regno == last_regno)
2227 fprintf (stderr, "%s:\t", reg_name);
2229 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2232 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2233 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2237 fprintf (stderr, ",\n\t");
2242 if (rs6000_hard_regno_nregs[m][r] > 1)
2243 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2244 rs6000_hard_regno_nregs[m][r]);
2246 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2251 if (call_used_regs[r])
2255 fprintf (stderr, ",\n\t");
2260 len += fprintf (stderr, "%s%s", comma, "call-used");
2268 fprintf (stderr, ",\n\t");
2273 len += fprintf (stderr, "%s%s", comma, "fixed");
2279 fprintf (stderr, ",\n\t");
2283 len += fprintf (stderr, "%sreg-class = %s", comma,
2284 reg_class_names[(int)rs6000_regno_regclass[r]]);
2289 fprintf (stderr, ",\n\t");
2293 fprintf (stderr, "%sregno = %d\n", comma, r);
2298 rs6000_debug_vector_unit (enum rs6000_vector v)
2304 case VECTOR_NONE: ret = "none"; break;
2305 case VECTOR_ALTIVEC: ret = "altivec"; break;
2306 case VECTOR_VSX: ret = "vsx"; break;
2307 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2308 default: ret = "unknown"; break;
2314 /* Inner function printing just the address mask for a particular reload
2316 DEBUG_FUNCTION char *
2317 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2322 if ((mask & RELOAD_REG_VALID) != 0)
2324 else if (keep_spaces)
2327 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2329 else if (keep_spaces)
2332 if ((mask & RELOAD_REG_INDEXED) != 0)
2334 else if (keep_spaces)
2337 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2339 else if ((mask & RELOAD_REG_OFFSET) != 0)
2341 else if (keep_spaces)
2344 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2346 else if (keep_spaces)
2349 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2351 else if (keep_spaces)
2354 if ((mask & RELOAD_REG_AND_M16) != 0)
2356 else if (keep_spaces)
2364 /* Print the address masks in a human readble fashion. */
2366 rs6000_debug_print_mode (ssize_t m)
2371 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2372 for (rc = 0; rc < N_RELOAD_REG; rc++)
2373 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2374 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2376 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2377 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2379 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2380 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2381 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2385 spaces += sizeof (" Reload=sl") - 1;
2387 if (reg_addr[m].scalar_in_vmx_p)
2389 fprintf (stderr, "%*s Upper=y", spaces, "");
2393 spaces += sizeof (" Upper=y") - 1;
2395 if (rs6000_vector_unit[m] != VECTOR_NONE
2396 || rs6000_vector_mem[m] != VECTOR_NONE)
2398 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2400 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2401 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2404 fputs ("\n", stderr);
2407 #define DEBUG_FMT_ID "%-32s= "
2408 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2409 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2410 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2412 /* Print various interesting information with -mdebug=reg. */
2414 rs6000_debug_reg_global (void)
2416 static const char *const tf[2] = { "false", "true" };
2417 const char *nl = (const char *)0;
2420 char costly_num[20];
2422 char flags_buffer[40];
2423 const char *costly_str;
2424 const char *nop_str;
2425 const char *trace_str;
2426 const char *abi_str;
2427 const char *cmodel_str;
2428 struct cl_target_option cl_opts;
2430 /* Modes we want tieable information on. */
2431 static const machine_mode print_tieable_modes[] = {
2465 /* Virtual regs we are interested in. */
2466 const static struct {
2467 int regno; /* register number. */
2468 const char *name; /* register name. */
2469 } virtual_regs[] = {
2470 { STACK_POINTER_REGNUM, "stack pointer:" },
2471 { TOC_REGNUM, "toc: " },
2472 { STATIC_CHAIN_REGNUM, "static chain: " },
2473 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2474 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2475 { ARG_POINTER_REGNUM, "arg pointer: " },
2476 { FRAME_POINTER_REGNUM, "frame pointer:" },
2477 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2478 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2479 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2480 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2481 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2482 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2483 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2484 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2485 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2488 fputs ("\nHard register information:\n", stderr);
2489 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2490 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2491 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2494 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2495 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2496 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2497 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2498 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2499 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2501 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2502 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2503 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2507 "d reg_class = %s\n"
2508 "f reg_class = %s\n"
2509 "v reg_class = %s\n"
2510 "wa reg_class = %s\n"
2511 "wb reg_class = %s\n"
2512 "wd reg_class = %s\n"
2513 "we reg_class = %s\n"
2514 "wf reg_class = %s\n"
2515 "wg reg_class = %s\n"
2516 "wh reg_class = %s\n"
2517 "wi reg_class = %s\n"
2518 "wj reg_class = %s\n"
2519 "wk reg_class = %s\n"
2520 "wl reg_class = %s\n"
2521 "wm reg_class = %s\n"
2522 "wp reg_class = %s\n"
2523 "wq reg_class = %s\n"
2524 "wr reg_class = %s\n"
2525 "ws reg_class = %s\n"
2526 "wt reg_class = %s\n"
2527 "wu reg_class = %s\n"
2528 "wv reg_class = %s\n"
2529 "ww reg_class = %s\n"
2530 "wx reg_class = %s\n"
2531 "wy reg_class = %s\n"
2532 "wz reg_class = %s\n"
2533 "wA reg_class = %s\n"
2534 "wH reg_class = %s\n"
2535 "wI reg_class = %s\n"
2536 "wJ reg_class = %s\n"
2537 "wK reg_class = %s\n"
2539 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2540 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2541 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2542 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2543 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2544 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2545 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2546 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2547 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2548 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2549 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2550 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2551 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2552 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2553 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2554 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2555 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2556 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2557 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2558 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2559 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2560 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2561 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2562 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2563 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2564 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2565 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2566 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2567 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2568 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2569 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2572 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2573 rs6000_debug_print_mode (m);
2575 fputs ("\n", stderr);
2577 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2579 machine_mode mode1 = print_tieable_modes[m1];
2580 bool first_time = true;
2582 nl = (const char *)0;
2583 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2585 machine_mode mode2 = print_tieable_modes[m2];
2586 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2590 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2595 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2600 fputs ("\n", stderr);
2606 if (rs6000_recip_control)
2608 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2610 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2611 if (rs6000_recip_bits[m])
2614 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2616 (RS6000_RECIP_AUTO_RE_P (m)
2618 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2619 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2621 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2624 fputs ("\n", stderr);
2627 if (rs6000_cpu_index >= 0)
2629 const char *name = processor_target_table[rs6000_cpu_index].name;
2631 = processor_target_table[rs6000_cpu_index].target_enable;
2633 sprintf (flags_buffer, "-mcpu=%s flags", name);
2634 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2637 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2639 if (rs6000_tune_index >= 0)
2641 const char *name = processor_target_table[rs6000_tune_index].name;
2643 = processor_target_table[rs6000_tune_index].target_enable;
2645 sprintf (flags_buffer, "-mtune=%s flags", name);
2646 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2649 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2651 cl_target_option_save (&cl_opts, &global_options);
2652 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2655 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2656 rs6000_isa_flags_explicit);
2658 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2659 rs6000_builtin_mask);
2661 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2663 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2664 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2666 switch (rs6000_sched_costly_dep)
2668 case max_dep_latency:
2669 costly_str = "max_dep_latency";
2673 costly_str = "no_dep_costly";
2676 case all_deps_costly:
2677 costly_str = "all_deps_costly";
2680 case true_store_to_load_dep_costly:
2681 costly_str = "true_store_to_load_dep_costly";
2684 case store_to_load_dep_costly:
2685 costly_str = "store_to_load_dep_costly";
2689 costly_str = costly_num;
2690 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2694 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2696 switch (rs6000_sched_insert_nops)
2698 case sched_finish_regroup_exact:
2699 nop_str = "sched_finish_regroup_exact";
2702 case sched_finish_pad_groups:
2703 nop_str = "sched_finish_pad_groups";
2706 case sched_finish_none:
2707 nop_str = "sched_finish_none";
2712 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2716 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2718 switch (rs6000_sdata)
2725 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2729 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2733 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2738 switch (rs6000_traceback)
2740 case traceback_default: trace_str = "default"; break;
2741 case traceback_none: trace_str = "none"; break;
2742 case traceback_part: trace_str = "part"; break;
2743 case traceback_full: trace_str = "full"; break;
2744 default: trace_str = "unknown"; break;
2747 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2749 switch (rs6000_current_cmodel)
2751 case CMODEL_SMALL: cmodel_str = "small"; break;
2752 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2753 case CMODEL_LARGE: cmodel_str = "large"; break;
2754 default: cmodel_str = "unknown"; break;
2757 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2759 switch (rs6000_current_abi)
2761 case ABI_NONE: abi_str = "none"; break;
2762 case ABI_AIX: abi_str = "aix"; break;
2763 case ABI_ELFv2: abi_str = "ELFv2"; break;
2764 case ABI_V4: abi_str = "V4"; break;
2765 case ABI_DARWIN: abi_str = "darwin"; break;
2766 default: abi_str = "unknown"; break;
2769 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2771 if (rs6000_altivec_abi)
2772 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2774 if (rs6000_darwin64_abi)
2775 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2777 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2778 (TARGET_SOFT_FLOAT ? "true" : "false"));
2780 if (TARGET_LINK_STACK)
2781 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2783 if (TARGET_P8_FUSION)
2787 strcpy (options, "power8");
2788 if (TARGET_P8_FUSION_SIGN)
2789 strcat (options, ", sign");
2791 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2794 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2795 TARGET_SECURE_PLT ? "secure" : "bss");
2796 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2797 aix_struct_return ? "aix" : "sysv");
2798 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2799 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2800 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2801 tf[!!rs6000_align_branch_targets]);
2802 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2803 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2804 rs6000_long_double_type_size);
2805 if (rs6000_long_double_type_size > 64)
2807 fprintf (stderr, DEBUG_FMT_S, "long double type",
2808 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2809 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2810 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2812 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2813 (int)rs6000_sched_restricted_insns_priority);
2814 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2816 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2817 (int)RS6000_BUILTIN_COUNT);
2819 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2820 (int)TARGET_FLOAT128_ENABLE_TYPE);
2823 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2824 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2826 if (TARGET_DIRECT_MOVE_128)
2827 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2828 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2832 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2833 legitimate address support to figure out the appropriate addressing to
2837 rs6000_setup_reg_addr_masks (void)
2839 ssize_t rc, reg, m, nregs;
2840 addr_mask_type any_addr_mask, addr_mask;
2842 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2844 machine_mode m2 = (machine_mode) m;
2845 bool complex_p = false;
2846 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2849 if (COMPLEX_MODE_P (m2))
2852 m2 = GET_MODE_INNER (m2);
2855 msize = GET_MODE_SIZE (m2);
2857 /* SDmode is special in that we want to access it only via REG+REG
2858 addressing on power7 and above, since we want to use the LFIWZX and
2859 STFIWZX instructions to load it. */
2860 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2863 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2866 reg = reload_reg_map[rc].reg;
2868 /* Can mode values go in the GPR/FPR/Altivec registers? */
2869 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2871 bool small_int_vsx_p = (small_int_p
2872 && (rc == RELOAD_REG_FPR
2873 || rc == RELOAD_REG_VMX));
2875 nregs = rs6000_hard_regno_nregs[m][reg];
2876 addr_mask |= RELOAD_REG_VALID;
2878 /* Indicate if the mode takes more than 1 physical register. If
2879 it takes a single register, indicate it can do REG+REG
2880 addressing. Small integers in VSX registers can only do
2881 REG+REG addressing. */
2882 if (small_int_vsx_p)
2883 addr_mask |= RELOAD_REG_INDEXED;
2884 else if (nregs > 1 || m == BLKmode || complex_p)
2885 addr_mask |= RELOAD_REG_MULTIPLE;
2887 addr_mask |= RELOAD_REG_INDEXED;
2889 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2890 addressing. If we allow scalars into Altivec registers,
2891 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2893 For VSX systems, we don't allow update addressing for
2894 DFmode/SFmode if those registers can go in both the
2895 traditional floating point registers and Altivec registers.
2896 The load/store instructions for the Altivec registers do not
2897 have update forms. If we allowed update addressing, it seems
2898 to break IV-OPT code using floating point if the index type is
2899 int instead of long (PR target/81550 and target/84042). */
2902 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2904 && !VECTOR_MODE_P (m2)
2905 && !FLOAT128_VECTOR_P (m2)
2907 && (m != E_DFmode || !TARGET_VSX)
2908 && (m != E_SFmode || !TARGET_P8_VECTOR)
2909 && !small_int_vsx_p)
2911 addr_mask |= RELOAD_REG_PRE_INCDEC;
2913 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2914 we don't allow PRE_MODIFY for some multi-register
2919 addr_mask |= RELOAD_REG_PRE_MODIFY;
2923 if (TARGET_POWERPC64)
2924 addr_mask |= RELOAD_REG_PRE_MODIFY;
2929 if (TARGET_HARD_FLOAT)
2930 addr_mask |= RELOAD_REG_PRE_MODIFY;
2936 /* GPR and FPR registers can do REG+OFFSET addressing, except
2937 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2938 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2939 if ((addr_mask != 0) && !indexed_only_p
2941 && (rc == RELOAD_REG_GPR
2942 || ((msize == 8 || m2 == SFmode)
2943 && (rc == RELOAD_REG_FPR
2944 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2945 addr_mask |= RELOAD_REG_OFFSET;
2947 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2948 instructions are enabled. The offset for 128-bit VSX registers is
2949 only 12-bits. While GPRs can handle the full offset range, VSX
2950 registers can only handle the restricted range. */
2951 else if ((addr_mask != 0) && !indexed_only_p
2952 && msize == 16 && TARGET_P9_VECTOR
2953 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2954 || (m2 == TImode && TARGET_VSX)))
2956 addr_mask |= RELOAD_REG_OFFSET;
2957 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2958 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2961 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2962 addressing on 128-bit types. */
2963 if (rc == RELOAD_REG_VMX && msize == 16
2964 && (addr_mask & RELOAD_REG_VALID) != 0)
2965 addr_mask |= RELOAD_REG_AND_M16;
2967 reg_addr[m].addr_mask[rc] = addr_mask;
2968 any_addr_mask |= addr_mask;
2971 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2976 /* Initialize the various global tables that are based on register size. */
2978 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2984 /* Precalculate REGNO_REG_CLASS. */
2985 rs6000_regno_regclass[0] = GENERAL_REGS;
2986 for (r = 1; r < 32; ++r)
2987 rs6000_regno_regclass[r] = BASE_REGS;
2989 for (r = 32; r < 64; ++r)
2990 rs6000_regno_regclass[r] = FLOAT_REGS;
2992 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2993 rs6000_regno_regclass[r] = NO_REGS;
2995 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2996 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2998 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2999 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
3000 rs6000_regno_regclass[r] = CR_REGS;
3002 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
3003 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
3004 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
3005 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
3006 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
3007 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
3008 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
3010 /* Precalculate register class to simpler reload register class. We don't
3011 need all of the register classes that are combinations of different
3012 classes, just the simple ones that have constraint letters. */
3013 for (c = 0; c < N_REG_CLASSES; c++)
3014 reg_class_to_reg_type[c] = NO_REG_TYPE;
3016 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
3017 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
3018 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
3019 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
3020 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
3021 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
3022 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
3023 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
3024 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
3025 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
3029 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
3030 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
3034 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
3035 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
3038 /* Precalculate the valid memory formats as well as the vector information,
3039 this must be set up before the rs6000_hard_regno_nregs_internal calls
3041 gcc_assert ((int)VECTOR_NONE == 0);
3042 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3043 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
3045 gcc_assert ((int)CODE_FOR_nothing == 0);
3046 memset ((void *) ®_addr[0], '\0', sizeof (reg_addr));
3048 gcc_assert ((int)NO_REGS == 0);
3049 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3051 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3052 believes it can use native alignment or still uses 128-bit alignment. */
3053 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3064 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3065 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3066 if (TARGET_FLOAT128_TYPE)
3068 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3069 rs6000_vector_align[KFmode] = 128;
3071 if (FLOAT128_IEEE_P (TFmode))
3073 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3074 rs6000_vector_align[TFmode] = 128;
3078 /* V2DF mode, VSX only. */
3081 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3082 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3083 rs6000_vector_align[V2DFmode] = align64;
3086 /* V4SF mode, either VSX or Altivec. */
3089 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3090 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3091 rs6000_vector_align[V4SFmode] = align32;
3093 else if (TARGET_ALTIVEC)
3095 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3096 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3097 rs6000_vector_align[V4SFmode] = align32;
3100 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3104 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3105 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3106 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3107 rs6000_vector_align[V4SImode] = align32;
3108 rs6000_vector_align[V8HImode] = align32;
3109 rs6000_vector_align[V16QImode] = align32;
3113 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3114 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3115 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3119 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3120 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3121 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3125 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3126 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3129 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3130 rs6000_vector_unit[V2DImode]
3131 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3132 rs6000_vector_align[V2DImode] = align64;
3134 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3135 rs6000_vector_unit[V1TImode]
3136 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3137 rs6000_vector_align[V1TImode] = 128;
3140 /* DFmode, see if we want to use the VSX unit. Memory is handled
3141 differently, so don't set rs6000_vector_mem. */
3144 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3145 rs6000_vector_align[DFmode] = 64;
3148 /* SFmode, see if we want to use the VSX unit. */
3149 if (TARGET_P8_VECTOR)
3151 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3152 rs6000_vector_align[SFmode] = 32;
3155 /* Allow TImode in VSX register and set the VSX memory macros. */
3158 rs6000_vector_mem[TImode] = VECTOR_VSX;
3159 rs6000_vector_align[TImode] = align64;
3162 /* Register class constraints for the constraints that depend on compile
3163 switches. When the VSX code was added, different constraints were added
3164 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3165 of the VSX registers are used. The register classes for scalar floating
3166 point types is set, based on whether we allow that type into the upper
3167 (Altivec) registers. GCC has register classes to target the Altivec
3168 registers for load/store operations, to select using a VSX memory
3169 operation instead of the traditional floating point operation. The
3172 d - Register class to use with traditional DFmode instructions.
3173 f - Register class to use with traditional SFmode instructions.
3174 v - Altivec register.
3175 wa - Any VSX register.
3176 wc - Reserved to represent individual CR bits (used in LLVM).
3177 wd - Preferred register class for V2DFmode.
3178 wf - Preferred register class for V4SFmode.
3179 wg - Float register for power6x move insns.
3180 wh - FP register for direct move instructions.
3181 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3182 wj - FP or VSX register to hold 64-bit integers for direct moves.
3183 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3184 wl - Float register if we can do 32-bit signed int loads.
3185 wm - VSX register for ISA 2.07 direct move operations.
3186 wn - always NO_REGS.
3187 wr - GPR if 64-bit mode is permitted.
3188 ws - Register class to do ISA 2.06 DF operations.
3189 wt - VSX register for TImode in VSX registers.
3190 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3191 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3192 ww - Register class to do SF conversions in with VSX operations.
3193 wx - Float register if we can do 32-bit int stores.
3194 wy - Register class to do ISA 2.07 SF operations.
3195 wz - Float register if we can do 32-bit unsigned int loads.
3196 wH - Altivec register if SImode is allowed in VSX registers.
3197 wI - Float register if SImode is allowed in VSX registers.
3198 wJ - Float register if QImode/HImode are allowed in VSX registers.
3199 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3201 if (TARGET_HARD_FLOAT)
3203 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3204 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3209 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3210 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3211 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3212 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; /* DFmode */
3213 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; /* DFmode */
3214 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS; /* DImode */
3215 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3218 /* Add conditional constraints based on various options, to allow us to
3219 collapse multiple insn patterns. */
3221 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3223 if (TARGET_MFPGPR) /* DFmode */
3224 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3227 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3229 if (TARGET_DIRECT_MOVE)
3231 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3232 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3233 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3234 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3235 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3236 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3239 if (TARGET_POWERPC64)
3241 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3242 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3245 if (TARGET_P8_VECTOR) /* SFmode */
3247 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3248 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3249 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3251 else if (TARGET_VSX)
3252 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3255 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3258 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3260 if (TARGET_FLOAT128_TYPE)
3262 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3263 if (FLOAT128_IEEE_P (TFmode))
3264 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3267 /* Support for new D-form instructions. */
3268 if (TARGET_P9_VECTOR)
3269 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3271 /* Support for new direct moves (ISA 3.0 + 64bit). */
3272 if (TARGET_DIRECT_MOVE_128)
3273 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3275 /* Support small integers in VSX registers. */
3276 if (TARGET_P8_VECTOR)
3278 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3279 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3280 if (TARGET_P9_VECTOR)
3282 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3283 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3287 /* Set up the reload helper and direct move functions. */
3288 if (TARGET_VSX || TARGET_ALTIVEC)
3292 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3293 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3294 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3295 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3296 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3297 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3298 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3299 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3300 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3301 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3302 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3303 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3304 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3305 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3306 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3307 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3308 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3309 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3310 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3311 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3313 if (FLOAT128_VECTOR_P (KFmode))
3315 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3316 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3319 if (FLOAT128_VECTOR_P (TFmode))
3321 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3322 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3325 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3327 if (TARGET_NO_SDMODE_STACK)
3329 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3330 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3335 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3336 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3339 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3341 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3342 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3343 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3344 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3345 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3346 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3347 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3348 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3349 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3351 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3352 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3353 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3354 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3355 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3356 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3357 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3358 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3359 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3361 if (FLOAT128_VECTOR_P (KFmode))
3363 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3364 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3367 if (FLOAT128_VECTOR_P (TFmode))
3369 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3370 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3376 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3377 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3378 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3379 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3380 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3381 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3382 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3383 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3384 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3385 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3386 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3387 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3388 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3389 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3390 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3391 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3392 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3393 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3394 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3395 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3397 if (FLOAT128_VECTOR_P (KFmode))
3399 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3400 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3403 if (FLOAT128_IEEE_P (TFmode))
3405 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3406 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3409 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3411 if (TARGET_NO_SDMODE_STACK)
3413 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3414 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3419 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3420 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3423 if (TARGET_DIRECT_MOVE)
3425 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3426 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3427 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3431 reg_addr[DFmode].scalar_in_vmx_p = true;
3432 reg_addr[DImode].scalar_in_vmx_p = true;
3434 if (TARGET_P8_VECTOR)
3436 reg_addr[SFmode].scalar_in_vmx_p = true;
3437 reg_addr[SImode].scalar_in_vmx_p = true;
3439 if (TARGET_P9_VECTOR)
3441 reg_addr[HImode].scalar_in_vmx_p = true;
3442 reg_addr[QImode].scalar_in_vmx_p = true;
3447 /* Precalculate HARD_REGNO_NREGS. */
3448 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3449 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3450 rs6000_hard_regno_nregs[m][r]
3451 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3453 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3454 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3455 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3456 rs6000_hard_regno_mode_ok_p[m][r]
3457 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3459 /* Precalculate CLASS_MAX_NREGS sizes. */
3460 for (c = 0; c < LIM_REG_CLASSES; ++c)
3464 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3465 reg_size = UNITS_PER_VSX_WORD;
3467 else if (c == ALTIVEC_REGS)
3468 reg_size = UNITS_PER_ALTIVEC_WORD;
3470 else if (c == FLOAT_REGS)
3471 reg_size = UNITS_PER_FP_WORD;
3474 reg_size = UNITS_PER_WORD;
3476 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3478 machine_mode m2 = (machine_mode)m;
3479 int reg_size2 = reg_size;
3481 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3483 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3484 reg_size2 = UNITS_PER_FP_WORD;
3486 rs6000_class_max_nregs[m][c]
3487 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3491 /* Calculate which modes to automatically generate code to use a the
3492 reciprocal divide and square root instructions. In the future, possibly
3493 automatically generate the instructions even if the user did not specify
3494 -mrecip. The older machines double precision reciprocal sqrt estimate is
3495 not accurate enough. */
3496 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3498 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3500 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3501 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3502 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3503 if (VECTOR_UNIT_VSX_P (V2DFmode))
3504 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3506 if (TARGET_FRSQRTES)
3507 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3509 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3510 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3511 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3512 if (VECTOR_UNIT_VSX_P (V2DFmode))
3513 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3515 if (rs6000_recip_control)
3517 if (!flag_finite_math_only)
3518 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3520 if (flag_trapping_math)
3521 warning (0, "%qs requires %qs or %qs", "-mrecip",
3522 "-fno-trapping-math", "-ffast-math");
3523 if (!flag_reciprocal_math)
3524 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3526 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3528 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3529 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3530 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3532 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3533 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3534 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3536 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3537 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3538 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3540 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3541 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3542 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3544 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3545 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3546 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3548 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3549 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3550 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3552 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3553 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3554 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3556 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3557 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3558 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3562 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3563 legitimate address support to figure out the appropriate addressing to
3565 rs6000_setup_reg_addr_masks ();
3567 if (global_init_p || TARGET_DEBUG_TARGET)
3569 if (TARGET_DEBUG_REG)
3570 rs6000_debug_reg_global ();
3572 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3574 "SImode variable mult cost = %d\n"
3575 "SImode constant mult cost = %d\n"
3576 "SImode short constant mult cost = %d\n"
3577 "DImode multipliciation cost = %d\n"
3578 "SImode division cost = %d\n"
3579 "DImode division cost = %d\n"
3580 "Simple fp operation cost = %d\n"
3581 "DFmode multiplication cost = %d\n"
3582 "SFmode division cost = %d\n"
3583 "DFmode division cost = %d\n"
3584 "cache line size = %d\n"
3585 "l1 cache size = %d\n"
3586 "l2 cache size = %d\n"
3587 "simultaneous prefetches = %d\n"
3590 rs6000_cost->mulsi_const,
3591 rs6000_cost->mulsi_const9,
3599 rs6000_cost->cache_line_size,
3600 rs6000_cost->l1_cache_size,
3601 rs6000_cost->l2_cache_size,
3602 rs6000_cost->simultaneous_prefetches);
3607 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3610 darwin_rs6000_override_options (void)
3612 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3614 rs6000_altivec_abi = 1;
3615 TARGET_ALTIVEC_VRSAVE = 1;
3616 rs6000_current_abi = ABI_DARWIN;
3618 if (DEFAULT_ABI == ABI_DARWIN
3620 darwin_one_byte_bool = 1;
3622 if (TARGET_64BIT && ! TARGET_POWERPC64)
3624 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3625 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3629 rs6000_default_long_calls = 1;
3630 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3633 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3635 if (!flag_mkernel && !flag_apple_kext
3637 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3638 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3640 /* Unless the user (not the configurer) has explicitly overridden
3641 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3642 G4 unless targeting the kernel. */
3645 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3646 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3647 && ! global_options_set.x_rs6000_cpu_index)
3649 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3654 /* If not otherwise specified by a target, make 'long double' equivalent to
3657 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3658 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3661 /* Return the builtin mask of the various options used that could affect which
3662 builtins were used. In the past we used target_flags, but we've run out of
3663 bits, and some options are no longer in target_flags. */
3666 rs6000_builtin_mask_calculate (void)
3668 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3669 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3670 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3671 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3672 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3673 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3674 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3675 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3676 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3677 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3678 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3679 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3680 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3681 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3682 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3683 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3684 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3685 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3686 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3687 | ((TARGET_LONG_DOUBLE_128
3688 && TARGET_HARD_FLOAT
3689 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3690 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3691 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3694 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3695 to clobber the XER[CA] bit because clobbering that bit without telling
3696 the compiler worked just fine with versions of GCC before GCC 5, and
3697 breaking a lot of older code in ways that are hard to track down is
3698 not such a great idea. */
3701 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3702 vec<const char *> &/*constraints*/,
3703 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3705 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3706 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3710 /* Override command line options.
3712 Combine build-specific configuration information with options
3713 specified on the command line to set various state variables which
3714 influence code generation, optimization, and expansion of built-in
3715 functions. Assure that command-line configuration preferences are
3716 compatible with each other and with the build configuration; issue
3717 warnings while adjusting configuration or error messages while
3718 rejecting configuration.
3720 Upon entry to this function:
3722 This function is called once at the beginning of
3723 compilation, and then again at the start and end of compiling
3724 each section of code that has a different configuration, as
3725 indicated, for example, by adding the
3727 __attribute__((__target__("cpu=power9")))
3729 qualifier to a function definition or, for example, by bracketing
3732 #pragma GCC target("altivec")
3736 #pragma GCC reset_options
3738 directives. Parameter global_init_p is true for the initial
3739 invocation, which initializes global variables, and false for all
3740 subsequent invocations.
3743 Various global state information is assumed to be valid. This
3744 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3745 default CPU specified at build configure time, TARGET_DEFAULT,
3746 representing the default set of option flags for the default
3747 target, and global_options_set.x_rs6000_isa_flags, representing
3748 which options were requested on the command line.
3750 Upon return from this function:
3752 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3753 was set by name on the command line. Additionally, if certain
3754 attributes are automatically enabled or disabled by this function
3755 in order to assure compatibility between options and
3756 configuration, the flags associated with those attributes are
3757 also set. By setting these "explicit bits", we avoid the risk
3758 that other code might accidentally overwrite these particular
3759 attributes with "default values".
3761 The various bits of rs6000_isa_flags are set to indicate the
3762 target options that have been selected for the most current
3763 compilation efforts. This has the effect of also turning on the
3764 associated TARGET_XXX values since these are macros which are
3765 generally defined to test the corresponding bit of the
3766 rs6000_isa_flags variable.
3768 The variable rs6000_builtin_mask is set to represent the target
3769 options for the most current compilation efforts, consistent with
3770 the current contents of rs6000_isa_flags. This variable controls
3771 expansion of built-in functions.
3773 Various other global variables and fields of global structures
3774 (over 50 in all) are initialized to reflect the desired options
3775 for the most current compilation efforts. */
3778 rs6000_option_override_internal (bool global_init_p)
3782 HOST_WIDE_INT set_masks;
3783 HOST_WIDE_INT ignore_masks;
3786 struct cl_target_option *main_target_opt
3787 = ((global_init_p || target_option_default_node == NULL)
3788 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3790 /* Print defaults. */
3791 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3792 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3794 /* Remember the explicit arguments. */
3796 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3798 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3799 library functions, so warn about it. The flag may be useful for
3800 performance studies from time to time though, so don't disable it
3802 if (global_options_set.x_rs6000_alignment_flags
3803 && rs6000_alignment_flags == MASK_ALIGN_POWER
3804 && DEFAULT_ABI == ABI_DARWIN
3806 warning (0, "%qs is not supported for 64-bit Darwin;"
3807 " it is incompatible with the installed C and C++ libraries",
3810 /* Numerous experiment shows that IRA based loop pressure
3811 calculation works better for RTL loop invariant motion on targets
3812 with enough (>= 32) registers. It is an expensive optimization.
3813 So it is on only for peak performance. */
3814 if (optimize >= 3 && global_init_p
3815 && !global_options_set.x_flag_ira_loop_pressure)
3816 flag_ira_loop_pressure = 1;
3818 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3819 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3820 options were already specified. */
3821 if (flag_sanitize & SANITIZE_USER_ADDRESS
3822 && !global_options_set.x_flag_asynchronous_unwind_tables)
3823 flag_asynchronous_unwind_tables = 1;
3825 /* Set the pointer size. */
3828 rs6000_pmode = DImode;
3829 rs6000_pointer_size = 64;
3833 rs6000_pmode = SImode;
3834 rs6000_pointer_size = 32;
3837 /* Some OSs don't support saving the high part of 64-bit registers on context
3838 switch. Other OSs don't support saving Altivec registers. On those OSs,
3839 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3840 if the user wants either, the user must explicitly specify them and we
3841 won't interfere with the user's specification. */
3843 set_masks = POWERPC_MASKS;
3844 #ifdef OS_MISSING_POWERPC64
3845 if (OS_MISSING_POWERPC64)
3846 set_masks &= ~OPTION_MASK_POWERPC64;
3848 #ifdef OS_MISSING_ALTIVEC
3849 if (OS_MISSING_ALTIVEC)
3850 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3851 | OTHER_VSX_VECTOR_MASKS);
3854 /* Don't override by the processor default if given explicitly. */
3855 set_masks &= ~rs6000_isa_flags_explicit;
3857 if (global_init_p && rs6000_dejagnu_cpu_index >= 0)
3858 rs6000_cpu_index = rs6000_dejagnu_cpu_index;
3860 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3861 the cpu in a target attribute or pragma, but did not specify a tuning
3862 option, use the cpu for the tuning option rather than the option specified
3863 with -mtune on the command line. Process a '--with-cpu' configuration
3864 request as an implicit --cpu. */
3865 if (rs6000_cpu_index >= 0)
3866 cpu_index = rs6000_cpu_index;
3867 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3868 cpu_index = main_target_opt->x_rs6000_cpu_index;
3869 else if (OPTION_TARGET_CPU_DEFAULT)
3870 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3872 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3873 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3874 with those from the cpu, except for options that were explicitly set. If
3875 we don't have a cpu, do not override the target bits set in
3879 rs6000_cpu_index = cpu_index;
3880 rs6000_isa_flags &= ~set_masks;
3881 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3886 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3887 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3888 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3889 to using rs6000_isa_flags, we need to do the initialization here.
3891 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3892 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3893 HOST_WIDE_INT flags;
3895 flags = TARGET_DEFAULT;
3898 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3899 const char *default_cpu = (!TARGET_POWERPC64
3904 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3905 flags = processor_target_table[default_cpu_index].target_enable;
3907 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3910 if (rs6000_tune_index >= 0)
3911 tune_index = rs6000_tune_index;
3912 else if (cpu_index >= 0)
3913 rs6000_tune_index = tune_index = cpu_index;
3917 enum processor_type tune_proc
3918 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3921 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3922 if (processor_target_table[i].processor == tune_proc)
3930 rs6000_cpu = processor_target_table[cpu_index].processor;
3932 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3934 gcc_assert (tune_index >= 0);
3935 rs6000_tune = processor_target_table[tune_index].processor;
3937 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3938 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3939 || rs6000_cpu == PROCESSOR_PPCE5500)
3942 error ("AltiVec not supported in this target");
3945 /* If we are optimizing big endian systems for space, use the load/store
3946 multiple instructions. */
3947 if (BYTES_BIG_ENDIAN && optimize_size)
3948 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3950 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3951 because the hardware doesn't support the instructions used in little
3952 endian mode, and causes an alignment trap. The 750 does not cause an
3953 alignment trap (except when the target is unaligned). */
3955 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3957 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3958 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3959 warning (0, "%qs is not supported on little endian systems",
3963 /* If little-endian, default to -mstrict-align on older processors.
3964 Testing for htm matches power8 and later. */
3965 if (!BYTES_BIG_ENDIAN
3966 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3967 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3969 if (!rs6000_fold_gimple)
3971 "gimple folding of rs6000 builtins has been disabled.\n");
3973 /* Add some warnings for VSX. */
3976 const char *msg = NULL;
3977 if (!TARGET_HARD_FLOAT)
3979 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3980 msg = N_("%<-mvsx%> requires hardware floating point");
3983 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3984 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3987 else if (TARGET_AVOID_XFORM > 0)
3988 msg = N_("%<-mvsx%> needs indexed addressing");
3989 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3990 & OPTION_MASK_ALTIVEC))
3992 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3993 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3995 msg = N_("%<-mno-altivec%> disables vsx");
4001 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4002 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4006 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4007 the -mcpu setting to enable options that conflict. */
4008 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4009 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4010 | OPTION_MASK_ALTIVEC
4011 | OPTION_MASK_VSX)) != 0)
4012 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4013 | OPTION_MASK_DIRECT_MOVE)
4014 & ~rs6000_isa_flags_explicit);
4016 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4017 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4019 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4020 off all of the options that depend on those flags. */
4021 ignore_masks = rs6000_disable_incompatible_switches ();
4023 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4024 unless the user explicitly used the -mno-<option> to disable the code. */
4025 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
4026 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4027 else if (TARGET_P9_MINMAX)
4031 if (cpu_index == PROCESSOR_POWER9)
4033 /* legacy behavior: allow -mcpu=power9 with certain
4034 capabilities explicitly disabled. */
4035 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4038 error ("power9 target option is incompatible with %<%s=<xxx>%> "
4039 "for <xxx> less than power9", "-mcpu");
4041 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
4042 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4043 & rs6000_isa_flags_explicit))
4044 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4045 were explicitly cleared. */
4046 error ("%qs incompatible with explicitly disabled options",
4049 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4051 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4052 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
4053 else if (TARGET_VSX)
4054 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
4055 else if (TARGET_POPCNTD)
4056 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
4057 else if (TARGET_DFP)
4058 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
4059 else if (TARGET_CMPB)
4060 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
4061 else if (TARGET_FPRND)
4062 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
4063 else if (TARGET_POPCNTB)
4064 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
4065 else if (TARGET_ALTIVEC)
4066 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
4068 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4070 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4071 error ("%qs requires %qs", "-mcrypto", "-maltivec");
4072 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4075 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4077 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4078 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
4079 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4082 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4084 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4085 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
4086 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4089 if (TARGET_P8_VECTOR && !TARGET_VSX)
4091 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4092 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4093 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
4094 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4096 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4097 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4098 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4102 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4104 rs6000_isa_flags |= OPTION_MASK_VSX;
4105 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4109 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4111 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4112 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
4113 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4116 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4117 silently turn off quad memory mode. */
4118 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4120 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4121 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
4123 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4124 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
4126 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4127 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4130 /* Non-atomic quad memory load/store are disabled for little endian, since
4131 the words are reversed, but atomic operations can still be done by
4132 swapping the words. */
4133 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4135 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4136 warning (0, N_("%<-mquad-memory%> is not available in little endian "
4139 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4142 /* Assume if the user asked for normal quad memory instructions, they want
4143 the atomic versions as well, unless they explicity told us not to use quad
4144 word atomic instructions. */
4145 if (TARGET_QUAD_MEMORY
4146 && !TARGET_QUAD_MEMORY_ATOMIC
4147 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4148 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4150 /* If we can shrink-wrap the TOC register save separately, then use
4151 -msave-toc-indirect unless explicitly disabled. */
4152 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4153 && flag_shrink_wrap_separate
4154 && optimize_function_for_speed_p (cfun))
4155 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4157 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4158 generating power8 instructions. Power9 does not optimize power8 fusion
4160 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4162 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4163 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4165 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4168 /* Setting additional fusion flags turns on base fusion. */
4169 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4171 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4173 if (TARGET_P8_FUSION_SIGN)
4174 error ("%qs requires %qs", "-mpower8-fusion-sign",
4177 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4180 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4183 /* Power8 does not fuse sign extended loads with the addis. If we are
4184 optimizing at high levels for speed, convert a sign extended load into a
4185 zero extending load, and an explicit sign extension. */
4186 if (TARGET_P8_FUSION
4187 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4188 && optimize_function_for_speed_p (cfun)
4190 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4192 /* ISA 3.0 vector instructions include ISA 2.07. */
4193 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4195 /* We prefer to not mention undocumented options in
4196 error messages. However, if users have managed to select
4197 power9-vector without selecting power8-vector, they
4198 already know about undocumented flags. */
4199 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4200 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4201 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4202 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4204 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4205 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4206 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4210 /* OPTION_MASK_P9_VECTOR is explicit and
4211 OPTION_MASK_P8_VECTOR is not explicit. */
4212 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4213 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4217 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4218 support. If we only have ISA 2.06 support, and the user did not specify
4219 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4220 but we don't enable the full vectorization support */
4221 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4222 TARGET_ALLOW_MOVMISALIGN = 1;
4224 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4226 if (TARGET_ALLOW_MOVMISALIGN > 0
4227 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4228 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4230 TARGET_ALLOW_MOVMISALIGN = 0;
4233 /* Determine when unaligned vector accesses are permitted, and when
4234 they are preferred over masked Altivec loads. Note that if
4235 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4236 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4238 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4242 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4243 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4245 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4248 else if (!TARGET_ALLOW_MOVMISALIGN)
4250 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4251 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4252 "-mallow-movmisalign");
4254 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4258 /* Use long double size to select the appropriate long double. We use
4259 TYPE_PRECISION to differentiate the 3 different long double types. We map
4260 128 into the precision used for TFmode. */
4261 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4263 : FLOAT_PRECISION_TFmode);
4265 /* Set long double size before the IEEE 128-bit tests. */
4266 if (!global_options_set.x_rs6000_long_double_type_size)
4268 if (main_target_opt != NULL
4269 && (main_target_opt->x_rs6000_long_double_type_size
4270 != default_long_double_size))
4271 error ("target attribute or pragma changes long double size");
4273 rs6000_long_double_type_size = default_long_double_size;
4275 else if (rs6000_long_double_type_size == 128)
4276 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4277 else if (global_options_set.x_rs6000_ieeequad)
4279 if (global_options.x_rs6000_ieeequad)
4280 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4282 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4285 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4286 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4287 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4288 those systems will not pick up this default. Warn if the user changes the
4289 default unless -Wno-psabi. */
4290 if (!global_options_set.x_rs6000_ieeequad)
4291 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4295 if (global_options.x_rs6000_ieeequad
4296 && (!TARGET_POPCNTD || !TARGET_VSX))
4297 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4299 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4301 static bool warned_change_long_double;
4302 if (!warned_change_long_double)
4304 warned_change_long_double = true;
4305 if (TARGET_IEEEQUAD)
4306 warning (OPT_Wpsabi, "Using IEEE extended precision long double");
4308 warning (OPT_Wpsabi, "Using IBM extended precision long double");
4313 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4314 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
4315 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4316 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4317 the keyword as well as the type. */
4318 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4320 /* IEEE 128-bit floating point requires VSX support. */
4321 if (TARGET_FLOAT128_KEYWORD)
4325 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4326 error ("%qs requires VSX support", "%<-mfloat128%>");
4328 TARGET_FLOAT128_TYPE = 0;
4329 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4330 | OPTION_MASK_FLOAT128_HW);
4332 else if (!TARGET_FLOAT128_TYPE)
4334 TARGET_FLOAT128_TYPE = 1;
4335 warning (0, "The %<-mfloat128%> option may not be fully supported");
4339 /* Enable the __float128 keyword under Linux by default. */
4340 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4341 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4342 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4344 /* If we have are supporting the float128 type and full ISA 3.0 support,
4345 enable -mfloat128-hardware by default. However, don't enable the
4346 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4347 because sometimes the compiler wants to put things in an integer
4348 container, and if we don't have __int128 support, it is impossible. */
4349 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4350 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4351 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4352 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4354 if (TARGET_FLOAT128_HW
4355 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4357 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4358 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4360 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4363 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4365 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4366 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4368 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4371 /* Print the options after updating the defaults. */
4372 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4373 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4375 /* E500mc does "better" if we inline more aggressively. Respect the
4376 user's opinion, though. */
4377 if (rs6000_block_move_inline_limit == 0
4378 && (rs6000_tune == PROCESSOR_PPCE500MC
4379 || rs6000_tune == PROCESSOR_PPCE500MC64
4380 || rs6000_tune == PROCESSOR_PPCE5500
4381 || rs6000_tune == PROCESSOR_PPCE6500))
4382 rs6000_block_move_inline_limit = 128;
4384 /* store_one_arg depends on expand_block_move to handle at least the
4385 size of reg_parm_stack_space. */
4386 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4387 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4391 /* If the appropriate debug option is enabled, replace the target hooks
4392 with debug versions that call the real version and then prints
4393 debugging information. */
4394 if (TARGET_DEBUG_COST)
4396 targetm.rtx_costs = rs6000_debug_rtx_costs;
4397 targetm.address_cost = rs6000_debug_address_cost;
4398 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4401 if (TARGET_DEBUG_ADDR)
4403 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4404 targetm.legitimize_address = rs6000_debug_legitimize_address;
4405 rs6000_secondary_reload_class_ptr
4406 = rs6000_debug_secondary_reload_class;
4407 targetm.secondary_memory_needed
4408 = rs6000_debug_secondary_memory_needed;
4409 targetm.can_change_mode_class
4410 = rs6000_debug_can_change_mode_class;
4411 rs6000_preferred_reload_class_ptr
4412 = rs6000_debug_preferred_reload_class;
4413 rs6000_mode_dependent_address_ptr
4414 = rs6000_debug_mode_dependent_address;
4417 if (rs6000_veclibabi_name)
4419 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4420 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4423 error ("unknown vectorization library ABI type (%qs) for "
4424 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4430 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4431 target attribute or pragma which automatically enables both options,
4432 unless the altivec ABI was set. This is set by default for 64-bit, but
4434 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4436 TARGET_FLOAT128_TYPE = 0;
4437 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4438 | OPTION_MASK_FLOAT128_KEYWORD)
4439 & ~rs6000_isa_flags_explicit);
4442 /* Enable Altivec ABI for AIX -maltivec. */
4443 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4445 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4446 error ("target attribute or pragma changes AltiVec ABI");
4448 rs6000_altivec_abi = 1;
4451 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4452 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4453 be explicitly overridden in either case. */
4456 if (!global_options_set.x_rs6000_altivec_abi
4457 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4459 if (main_target_opt != NULL &&
4460 !main_target_opt->x_rs6000_altivec_abi)
4461 error ("target attribute or pragma changes AltiVec ABI");
4463 rs6000_altivec_abi = 1;
4467 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4468 So far, the only darwin64 targets are also MACH-O. */
4470 && DEFAULT_ABI == ABI_DARWIN
4473 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4474 error ("target attribute or pragma changes darwin64 ABI");
4477 rs6000_darwin64_abi = 1;
4478 /* Default to natural alignment, for better performance. */
4479 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4483 /* Place FP constants in the constant pool instead of TOC
4484 if section anchors enabled. */
4485 if (flag_section_anchors
4486 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4487 TARGET_NO_FP_IN_TOC = 1;
4489 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4490 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4492 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4493 SUBTARGET_OVERRIDE_OPTIONS;
4495 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4496 SUBSUBTARGET_OVERRIDE_OPTIONS;
4498 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4499 SUB3TARGET_OVERRIDE_OPTIONS;
4502 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4503 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4505 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4506 && rs6000_tune != PROCESSOR_POWER5
4507 && rs6000_tune != PROCESSOR_POWER6
4508 && rs6000_tune != PROCESSOR_POWER7
4509 && rs6000_tune != PROCESSOR_POWER8
4510 && rs6000_tune != PROCESSOR_POWER9
4511 && rs6000_tune != PROCESSOR_PPCA2
4512 && rs6000_tune != PROCESSOR_CELL
4513 && rs6000_tune != PROCESSOR_PPC476);
4514 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4515 || rs6000_tune == PROCESSOR_POWER5
4516 || rs6000_tune == PROCESSOR_POWER7
4517 || rs6000_tune == PROCESSOR_POWER8);
4518 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4519 || rs6000_tune == PROCESSOR_POWER5
4520 || rs6000_tune == PROCESSOR_POWER6
4521 || rs6000_tune == PROCESSOR_POWER7
4522 || rs6000_tune == PROCESSOR_POWER8
4523 || rs6000_tune == PROCESSOR_POWER9
4524 || rs6000_tune == PROCESSOR_PPCE500MC
4525 || rs6000_tune == PROCESSOR_PPCE500MC64
4526 || rs6000_tune == PROCESSOR_PPCE5500
4527 || rs6000_tune == PROCESSOR_PPCE6500);
4529 /* Allow debug switches to override the above settings. These are set to -1
4530 in rs6000.opt to indicate the user hasn't directly set the switch. */
4531 if (TARGET_ALWAYS_HINT >= 0)
4532 rs6000_always_hint = TARGET_ALWAYS_HINT;
4534 if (TARGET_SCHED_GROUPS >= 0)
4535 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4537 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4538 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4540 rs6000_sched_restricted_insns_priority
4541 = (rs6000_sched_groups ? 1 : 0);
4543 /* Handle -msched-costly-dep option. */
4544 rs6000_sched_costly_dep
4545 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4547 if (rs6000_sched_costly_dep_str)
4549 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4550 rs6000_sched_costly_dep = no_dep_costly;
4551 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4552 rs6000_sched_costly_dep = all_deps_costly;
4553 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4554 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4555 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4556 rs6000_sched_costly_dep = store_to_load_dep_costly;
4558 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4559 atoi (rs6000_sched_costly_dep_str));
4562 /* Handle -minsert-sched-nops option. */
4563 rs6000_sched_insert_nops
4564 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4566 if (rs6000_sched_insert_nops_str)
4568 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4569 rs6000_sched_insert_nops = sched_finish_none;
4570 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4571 rs6000_sched_insert_nops = sched_finish_pad_groups;
4572 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4573 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4575 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4576 atoi (rs6000_sched_insert_nops_str));
4579 /* Handle stack protector */
4580 if (!global_options_set.x_rs6000_stack_protector_guard)
4581 #ifdef TARGET_THREAD_SSP_OFFSET
4582 rs6000_stack_protector_guard = SSP_TLS;
4584 rs6000_stack_protector_guard = SSP_GLOBAL;
4587 #ifdef TARGET_THREAD_SSP_OFFSET
4588 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4589 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4592 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4595 const char *str = rs6000_stack_protector_guard_offset_str;
4598 long offset = strtol (str, &endp, 0);
4599 if (!*str || *endp || errno)
4600 error ("%qs is not a valid number in %qs", str,
4601 "-mstack-protector-guard-offset=");
4603 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4604 || (TARGET_64BIT && (offset & 3)))
4605 error ("%qs is not a valid offset in %qs", str,
4606 "-mstack-protector-guard-offset=");
4608 rs6000_stack_protector_guard_offset = offset;
4611 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4613 const char *str = rs6000_stack_protector_guard_reg_str;
4614 int reg = decode_reg_name (str);
4616 if (!IN_RANGE (reg, 1, 31))
4617 error ("%qs is not a valid base register in %qs", str,
4618 "-mstack-protector-guard-reg=");
4620 rs6000_stack_protector_guard_reg = reg;
4623 if (rs6000_stack_protector_guard == SSP_TLS
4624 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4625 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4629 #ifdef TARGET_REGNAMES
4630 /* If the user desires alternate register names, copy in the
4631 alternate names now. */
4632 if (TARGET_REGNAMES)
4633 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4636 /* Set aix_struct_return last, after the ABI is determined.
4637 If -maix-struct-return or -msvr4-struct-return was explicitly
4638 used, don't override with the ABI default. */
4639 if (!global_options_set.x_aix_struct_return)
4640 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4643 /* IBM XL compiler defaults to unsigned bitfields. */
4644 if (TARGET_XL_COMPAT)
4645 flag_signed_bitfields = 0;
4648 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4649 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4651 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4653 /* We can only guarantee the availability of DI pseudo-ops when
4654 assembling for 64-bit targets. */
4657 targetm.asm_out.aligned_op.di = NULL;
4658 targetm.asm_out.unaligned_op.di = NULL;
4662 /* Set branch target alignment, if not optimizing for size. */
4665 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4666 aligned 8byte to avoid misprediction by the branch predictor. */
4667 if (rs6000_tune == PROCESSOR_TITAN
4668 || rs6000_tune == PROCESSOR_CELL)
4670 if (flag_align_functions && !str_align_functions)
4671 str_align_functions = "8";
4672 if (flag_align_jumps && !str_align_jumps)
4673 str_align_jumps = "8";
4674 if (flag_align_loops && !str_align_loops)
4675 str_align_loops = "8";
4677 if (rs6000_align_branch_targets)
4679 if (flag_align_functions && !str_align_functions)
4680 str_align_functions = "16";
4681 if (flag_align_jumps && !str_align_jumps)
4682 str_align_jumps = "16";
4683 if (flag_align_loops && !str_align_loops)
4685 can_override_loop_align = 1;
4686 str_align_loops = "16";
4690 if (flag_align_jumps && !str_align_jumps)
4691 str_align_jumps = "16";
4692 if (flag_align_loops && !str_align_loops)
4693 str_align_loops = "16";
4696 /* Arrange to save and restore machine status around nested functions. */
4697 init_machine_status = rs6000_init_machine_status;
4699 /* We should always be splitting complex arguments, but we can't break
4700 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4701 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4702 targetm.calls.split_complex_arg = NULL;
4704 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4705 if (DEFAULT_ABI == ABI_AIX)
4706 targetm.calls.custom_function_descriptors = 0;
4709 /* Initialize rs6000_cost with the appropriate target costs. */
4711 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4713 switch (rs6000_tune)
4715 case PROCESSOR_RS64A:
4716 rs6000_cost = &rs64a_cost;
4719 case PROCESSOR_MPCCORE:
4720 rs6000_cost = &mpccore_cost;
4723 case PROCESSOR_PPC403:
4724 rs6000_cost = &ppc403_cost;
4727 case PROCESSOR_PPC405:
4728 rs6000_cost = &ppc405_cost;
4731 case PROCESSOR_PPC440:
4732 rs6000_cost = &ppc440_cost;
4735 case PROCESSOR_PPC476:
4736 rs6000_cost = &ppc476_cost;
4739 case PROCESSOR_PPC601:
4740 rs6000_cost = &ppc601_cost;
4743 case PROCESSOR_PPC603:
4744 rs6000_cost = &ppc603_cost;
4747 case PROCESSOR_PPC604:
4748 rs6000_cost = &ppc604_cost;
4751 case PROCESSOR_PPC604e:
4752 rs6000_cost = &ppc604e_cost;
4755 case PROCESSOR_PPC620:
4756 rs6000_cost = &ppc620_cost;
4759 case PROCESSOR_PPC630:
4760 rs6000_cost = &ppc630_cost;
4763 case PROCESSOR_CELL:
4764 rs6000_cost = &ppccell_cost;
4767 case PROCESSOR_PPC750:
4768 case PROCESSOR_PPC7400:
4769 rs6000_cost = &ppc750_cost;
4772 case PROCESSOR_PPC7450:
4773 rs6000_cost = &ppc7450_cost;
4776 case PROCESSOR_PPC8540:
4777 case PROCESSOR_PPC8548:
4778 rs6000_cost = &ppc8540_cost;
4781 case PROCESSOR_PPCE300C2:
4782 case PROCESSOR_PPCE300C3:
4783 rs6000_cost = &ppce300c2c3_cost;
4786 case PROCESSOR_PPCE500MC:
4787 rs6000_cost = &ppce500mc_cost;
4790 case PROCESSOR_PPCE500MC64:
4791 rs6000_cost = &ppce500mc64_cost;
4794 case PROCESSOR_PPCE5500:
4795 rs6000_cost = &ppce5500_cost;
4798 case PROCESSOR_PPCE6500:
4799 rs6000_cost = &ppce6500_cost;
4802 case PROCESSOR_TITAN:
4803 rs6000_cost = &titan_cost;
4806 case PROCESSOR_POWER4:
4807 case PROCESSOR_POWER5:
4808 rs6000_cost = &power4_cost;
4811 case PROCESSOR_POWER6:
4812 rs6000_cost = &power6_cost;
4815 case PROCESSOR_POWER7:
4816 rs6000_cost = &power7_cost;
4819 case PROCESSOR_POWER8:
4820 rs6000_cost = &power8_cost;
4823 case PROCESSOR_POWER9:
4824 rs6000_cost = &power9_cost;
4827 case PROCESSOR_PPCA2:
4828 rs6000_cost = &ppca2_cost;
4837 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4838 rs6000_cost->simultaneous_prefetches,
4839 global_options.x_param_values,
4840 global_options_set.x_param_values);
4841 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4842 global_options.x_param_values,
4843 global_options_set.x_param_values);
4844 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4845 rs6000_cost->cache_line_size,
4846 global_options.x_param_values,
4847 global_options_set.x_param_values);
4848 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4849 global_options.x_param_values,
4850 global_options_set.x_param_values);
4852 /* Increase loop peeling limits based on performance analysis. */
4853 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4854 global_options.x_param_values,
4855 global_options_set.x_param_values);
4856 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4857 global_options.x_param_values,
4858 global_options_set.x_param_values);
4860 /* Use the 'model' -fsched-pressure algorithm by default. */
4861 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
4862 SCHED_PRESSURE_MODEL,
4863 global_options.x_param_values,
4864 global_options_set.x_param_values);
4866 /* If using typedef char *va_list, signal that
4867 __builtin_va_start (&ap, 0) can be optimized to
4868 ap = __builtin_next_arg (0). */
4869 if (DEFAULT_ABI != ABI_V4)
4870 targetm.expand_builtin_va_start = NULL;
4873 /* If not explicitly specified via option, decide whether to generate indexed
4874 load/store instructions. A value of -1 indicates that the
4875 initial value of this variable has not been overwritten. During
4876 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4877 if (TARGET_AVOID_XFORM == -1)
4878 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4879 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4880 need indexed accesses and the type used is the scalar type of the element
4881 being loaded or stored. */
4882 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4883 && !TARGET_ALTIVEC);
4885 /* Set the -mrecip options. */
4886 if (rs6000_recip_name)
4888 char *p = ASTRDUP (rs6000_recip_name);
4890 unsigned int mask, i;
4893 while ((q = strtok (p, ",")) != NULL)
4904 if (!strcmp (q, "default"))
4905 mask = ((TARGET_RECIP_PRECISION)
4906 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4909 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4910 if (!strcmp (q, recip_options[i].string))
4912 mask = recip_options[i].mask;
4916 if (i == ARRAY_SIZE (recip_options))
4918 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4926 rs6000_recip_control &= ~mask;
4928 rs6000_recip_control |= mask;
4932 /* Set the builtin mask of the various options used that could affect which
4933 builtins were used. In the past we used target_flags, but we've run out
4934 of bits, and some options are no longer in target_flags. */
4935 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4936 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4937 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4938 rs6000_builtin_mask);
4940 /* Initialize all of the registers. */
4941 rs6000_init_hard_regno_mode_ok (global_init_p);
4943 /* Save the initial options in case the user does function specific options */
4945 target_option_default_node = target_option_current_node
4946 = build_target_option_node (&global_options);
4948 /* If not explicitly specified via option, decide whether to generate the
4949 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4950 if (TARGET_LINK_STACK == -1)
4951 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4953 /* Deprecate use of -mno-speculate-indirect-jumps. */
4954 if (!rs6000_speculate_indirect_jumps)
4955 warning (0, "%qs is deprecated and not recommended in any circumstances",
4956 "-mno-speculate-indirect-jumps");
4961 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4962 define the target cpu type. */
4965 rs6000_option_override (void)
4967 (void) rs6000_option_override_internal (true);
4971 /* Implement targetm.vectorize.builtin_mask_for_load. */
4973 rs6000_builtin_mask_for_load (void)
4975 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4976 if ((TARGET_ALTIVEC && !TARGET_VSX)
4977 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4978 return altivec_builtin_mask_for_load;
4983 /* Implement LOOP_ALIGN. */
4985 rs6000_loop_align (rtx label)
4990 /* Don't override loop alignment if -falign-loops was specified. */
4991 if (!can_override_loop_align)
4994 bb = BLOCK_FOR_INSN (label);
4995 ninsns = num_loop_insns(bb->loop_father);
4997 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4998 if (ninsns > 4 && ninsns <= 8
4999 && (rs6000_tune == PROCESSOR_POWER4
5000 || rs6000_tune == PROCESSOR_POWER5
5001 || rs6000_tune == PROCESSOR_POWER6
5002 || rs6000_tune == PROCESSOR_POWER7
5003 || rs6000_tune == PROCESSOR_POWER8))
5004 return align_flags (5);
5009 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5010 after applying N number of iterations. This routine does not determine
5011 how may iterations are required to reach desired alignment. */
5014 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5021 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5024 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5034 /* Assuming that all other types are naturally aligned. CHECKME! */
5039 /* Return true if the vector misalignment factor is supported by the
5042 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5049 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5052 /* Return if movmisalign pattern is not supported for this mode. */
5053 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5056 if (misalignment == -1)
5058 /* Misalignment factor is unknown at compile time but we know
5059 it's word aligned. */
5060 if (rs6000_vector_alignment_reachable (type, is_packed))
5062 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5064 if (element_size == 64 || element_size == 32)
5071 /* VSX supports word-aligned vector. */
5072 if (misalignment % 4 == 0)
5078 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5080 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5081 tree vectype, int misalign)
5086 switch (type_of_cost)
5096 case cond_branch_not_taken:
5105 case vec_promote_demote:
5111 case cond_branch_taken:
5114 case unaligned_load:
5115 case vector_gather_load:
5116 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5119 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5121 elements = TYPE_VECTOR_SUBPARTS (vectype);
5123 /* Double word aligned. */
5131 /* Double word aligned. */
5135 /* Unknown misalignment. */
5148 /* Misaligned loads are not supported. */
5153 case unaligned_store:
5154 case vector_scatter_store:
5155 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5158 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5160 elements = TYPE_VECTOR_SUBPARTS (vectype);
5162 /* Double word aligned. */
5170 /* Double word aligned. */
5174 /* Unknown misalignment. */
5187 /* Misaligned stores are not supported. */
5193 /* This is a rough approximation assuming non-constant elements
5194 constructed into a vector via element insertion. FIXME:
5195 vec_construct is not granular enough for uniformly good
5196 decisions. If the initialization is a splat, this is
5197 cheaper than we estimate. Improve this someday. */
5198 elem_type = TREE_TYPE (vectype);
5199 /* 32-bit vectors loaded into registers are stored as double
5200 precision, so we need 2 permutes, 2 converts, and 1 merge
5201 to construct a vector of short floats from them. */
5202 if (SCALAR_FLOAT_TYPE_P (elem_type)
5203 && TYPE_PRECISION (elem_type) == 32)
5205 /* On POWER9, integer vector types are built up in GPRs and then
5206 use a direct move (2 cycles). For POWER8 this is even worse,
5207 as we need two direct moves and a merge, and the direct moves
5209 else if (INTEGRAL_TYPE_P (elem_type))
5211 if (TARGET_P9_VECTOR)
5212 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5214 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5217 /* V2DFmode doesn't need a direct move. */
5225 /* Implement targetm.vectorize.preferred_simd_mode. */
5228 rs6000_preferred_simd_mode (scalar_mode mode)
5237 if (TARGET_ALTIVEC || TARGET_VSX)
5257 typedef struct _rs6000_cost_data
5259 struct loop *loop_info;
5263 /* Test for likely overcommitment of vector hardware resources. If a
5264 loop iteration is relatively large, and too large a percentage of
5265 instructions in the loop are vectorized, the cost model may not
5266 adequately reflect delays from unavailable vector resources.
5267 Penalize the loop body cost for this case. */
5270 rs6000_density_test (rs6000_cost_data *data)
5272 const int DENSITY_PCT_THRESHOLD = 85;
5273 const int DENSITY_SIZE_THRESHOLD = 70;
5274 const int DENSITY_PENALTY = 10;
5275 struct loop *loop = data->loop_info;
5276 basic_block *bbs = get_loop_body (loop);
5277 int nbbs = loop->num_nodes;
5278 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
5279 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5282 for (i = 0; i < nbbs; i++)
5284 basic_block bb = bbs[i];
5285 gimple_stmt_iterator gsi;
5287 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5289 gimple *stmt = gsi_stmt (gsi);
5290 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5292 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5293 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5299 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5301 if (density_pct > DENSITY_PCT_THRESHOLD
5302 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5304 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5305 if (dump_enabled_p ())
5306 dump_printf_loc (MSG_NOTE, vect_location,
5307 "density %d%%, cost %d exceeds threshold, penalizing "
5308 "loop body cost by %d%%", density_pct,
5309 vec_cost + not_vec_cost, DENSITY_PENALTY);
5313 /* Implement targetm.vectorize.init_cost. */
5315 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5316 instruction is needed by the vectorization. */
5317 static bool rs6000_vect_nonmem;
5320 rs6000_init_cost (struct loop *loop_info)
5322 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5323 data->loop_info = loop_info;
5324 data->cost[vect_prologue] = 0;
5325 data->cost[vect_body] = 0;
5326 data->cost[vect_epilogue] = 0;
5327 rs6000_vect_nonmem = false;
5331 /* Implement targetm.vectorize.add_stmt_cost. */
5334 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5335 struct _stmt_vec_info *stmt_info, int misalign,
5336 enum vect_cost_model_location where)
5338 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5339 unsigned retval = 0;
5341 if (flag_vect_cost_model)
5343 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5344 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5346 /* Statements in an inner loop relative to the loop being
5347 vectorized are weighted more heavily. The value here is
5348 arbitrary and could potentially be improved with analysis. */
5349 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5350 count *= 50; /* FIXME. */
5352 retval = (unsigned) (count * stmt_cost);
5353 cost_data->cost[where] += retval;
5355 /* Check whether we're doing something other than just a copy loop.
5356 Not all such loops may be profitably vectorized; see
5357 rs6000_finish_cost. */
5358 if ((kind == vec_to_scalar || kind == vec_perm
5359 || kind == vec_promote_demote || kind == vec_construct
5360 || kind == scalar_to_vec)
5361 || (where == vect_body && kind == vector_stmt))
5362 rs6000_vect_nonmem = true;
5368 /* Implement targetm.vectorize.finish_cost. */
5371 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5372 unsigned *body_cost, unsigned *epilogue_cost)
5374 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5376 if (cost_data->loop_info)
5377 rs6000_density_test (cost_data);
5379 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5380 that require versioning for any reason. The vectorization is at
5381 best a wash inside the loop, and the versioning checks make
5382 profitability highly unlikely and potentially quite harmful. */
5383 if (cost_data->loop_info)
5385 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5386 if (!rs6000_vect_nonmem
5387 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5388 && LOOP_REQUIRES_VERSIONING (vec_info))
5389 cost_data->cost[vect_body] += 10000;
5392 *prologue_cost = cost_data->cost[vect_prologue];
5393 *body_cost = cost_data->cost[vect_body];
5394 *epilogue_cost = cost_data->cost[vect_epilogue];
5397 /* Implement targetm.vectorize.destroy_cost_data. */
5400 rs6000_destroy_cost_data (void *data)
5405 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5406 library with vectorized intrinsics. */
5409 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5413 const char *suffix = NULL;
5414 tree fntype, new_fndecl, bdecl = NULL_TREE;
5417 machine_mode el_mode, in_mode;
5420 /* Libmass is suitable for unsafe math only as it does not correctly support
5421 parts of IEEE with the required precision such as denormals. Only support
5422 it if we have VSX to use the simd d2 or f4 functions.
5423 XXX: Add variable length support. */
5424 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5427 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5428 n = TYPE_VECTOR_SUBPARTS (type_out);
5429 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5430 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5431 if (el_mode != in_mode
5467 if (el_mode == DFmode && n == 2)
5469 bdecl = mathfn_built_in (double_type_node, fn);
5470 suffix = "d2"; /* pow -> powd2 */
5472 else if (el_mode == SFmode && n == 4)
5474 bdecl = mathfn_built_in (float_type_node, fn);
5475 suffix = "4"; /* powf -> powf4 */
5487 gcc_assert (suffix != NULL);
5488 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5492 strcpy (name, bname + sizeof ("__builtin_") - 1);
5493 strcat (name, suffix);
5496 fntype = build_function_type_list (type_out, type_in, NULL);
5497 else if (n_args == 2)
5498 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5502 /* Build a function declaration for the vectorized function. */
5503 new_fndecl = build_decl (BUILTINS_LOCATION,
5504 FUNCTION_DECL, get_identifier (name), fntype);
5505 TREE_PUBLIC (new_fndecl) = 1;
5506 DECL_EXTERNAL (new_fndecl) = 1;
5507 DECL_IS_NOVOPS (new_fndecl) = 1;
5508 TREE_READONLY (new_fndecl) = 1;
5513 /* Returns a function decl for a vectorized version of the builtin function
5514 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5515 if it is not available. */
5518 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5521 machine_mode in_mode, out_mode;
5524 if (TARGET_DEBUG_BUILTIN)
5525 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5526 combined_fn_name (combined_fn (fn)),
5527 GET_MODE_NAME (TYPE_MODE (type_out)),
5528 GET_MODE_NAME (TYPE_MODE (type_in)));
5530 if (TREE_CODE (type_out) != VECTOR_TYPE
5531 || TREE_CODE (type_in) != VECTOR_TYPE)
5534 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5535 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5536 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5537 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5542 if (VECTOR_UNIT_VSX_P (V2DFmode)
5543 && out_mode == DFmode && out_n == 2
5544 && in_mode == DFmode && in_n == 2)
5545 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5546 if (VECTOR_UNIT_VSX_P (V4SFmode)
5547 && out_mode == SFmode && out_n == 4
5548 && in_mode == SFmode && in_n == 4)
5549 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5550 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5551 && out_mode == SFmode && out_n == 4
5552 && in_mode == SFmode && in_n == 4)
5553 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5556 if (VECTOR_UNIT_VSX_P (V2DFmode)
5557 && out_mode == DFmode && out_n == 2
5558 && in_mode == DFmode && in_n == 2)
5559 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5560 if (VECTOR_UNIT_VSX_P (V4SFmode)
5561 && out_mode == SFmode && out_n == 4
5562 && in_mode == SFmode && in_n == 4)
5563 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5564 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5565 && out_mode == SFmode && out_n == 4
5566 && in_mode == SFmode && in_n == 4)
5567 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5570 if (VECTOR_UNIT_VSX_P (V2DFmode)
5571 && out_mode == DFmode && out_n == 2
5572 && in_mode == DFmode && in_n == 2)
5573 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5574 if (VECTOR_UNIT_VSX_P (V4SFmode)
5575 && out_mode == SFmode && out_n == 4
5576 && in_mode == SFmode && in_n == 4)
5577 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5578 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5579 && out_mode == SFmode && out_n == 4
5580 && in_mode == SFmode && in_n == 4)
5581 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5584 if (VECTOR_UNIT_VSX_P (V2DFmode)
5585 && out_mode == DFmode && out_n == 2
5586 && in_mode == DFmode && in_n == 2)
5587 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5588 if (VECTOR_UNIT_VSX_P (V4SFmode)
5589 && out_mode == SFmode && out_n == 4
5590 && in_mode == SFmode && in_n == 4)
5591 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5592 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5593 && out_mode == SFmode && out_n == 4
5594 && in_mode == SFmode && in_n == 4)
5595 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5598 if (VECTOR_UNIT_VSX_P (V2DFmode)
5599 && out_mode == DFmode && out_n == 2
5600 && in_mode == DFmode && in_n == 2)
5601 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5602 if (VECTOR_UNIT_VSX_P (V4SFmode)
5603 && out_mode == SFmode && out_n == 4
5604 && in_mode == SFmode && in_n == 4)
5605 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5606 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5607 && out_mode == SFmode && out_n == 4
5608 && in_mode == SFmode && in_n == 4)
5609 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5612 if (VECTOR_UNIT_VSX_P (V2DFmode)
5613 && flag_unsafe_math_optimizations
5614 && out_mode == DFmode && out_n == 2
5615 && in_mode == DFmode && in_n == 2)
5616 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5617 if (VECTOR_UNIT_VSX_P (V4SFmode)
5618 && flag_unsafe_math_optimizations
5619 && out_mode == SFmode && out_n == 4
5620 && in_mode == SFmode && in_n == 4)
5621 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5624 if (VECTOR_UNIT_VSX_P (V2DFmode)
5625 && !flag_trapping_math
5626 && out_mode == DFmode && out_n == 2
5627 && in_mode == DFmode && in_n == 2)
5628 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5629 if (VECTOR_UNIT_VSX_P (V4SFmode)
5630 && !flag_trapping_math
5631 && out_mode == SFmode && out_n == 4
5632 && in_mode == SFmode && in_n == 4)
5633 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5639 /* Generate calls to libmass if appropriate. */
5640 if (rs6000_veclib_handler)
5641 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5646 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5649 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5652 machine_mode in_mode, out_mode;
5655 if (TARGET_DEBUG_BUILTIN)
5656 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5657 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5658 GET_MODE_NAME (TYPE_MODE (type_out)),
5659 GET_MODE_NAME (TYPE_MODE (type_in)));
5661 if (TREE_CODE (type_out) != VECTOR_TYPE
5662 || TREE_CODE (type_in) != VECTOR_TYPE)
5665 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5666 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5667 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5668 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5670 enum rs6000_builtins fn
5671 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5674 case RS6000_BUILTIN_RSQRTF:
5675 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5676 && out_mode == SFmode && out_n == 4
5677 && in_mode == SFmode && in_n == 4)
5678 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5680 case RS6000_BUILTIN_RSQRT:
5681 if (VECTOR_UNIT_VSX_P (V2DFmode)
5682 && out_mode == DFmode && out_n == 2
5683 && in_mode == DFmode && in_n == 2)
5684 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5686 case RS6000_BUILTIN_RECIPF:
5687 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5688 && out_mode == SFmode && out_n == 4
5689 && in_mode == SFmode && in_n == 4)
5690 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5692 case RS6000_BUILTIN_RECIP:
5693 if (VECTOR_UNIT_VSX_P (V2DFmode)
5694 && out_mode == DFmode && out_n == 2
5695 && in_mode == DFmode && in_n == 2)
5696 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5704 /* Default CPU string for rs6000*_file_start functions. */
5705 static const char *rs6000_default_cpu;
5707 /* Do anything needed at the start of the asm file. */
5710 rs6000_file_start (void)
5713 const char *start = buffer;
5714 FILE *file = asm_out_file;
5716 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5718 default_file_start ();
5720 if (flag_verbose_asm)
5722 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5724 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5726 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5730 if (global_options_set.x_rs6000_cpu_index)
5732 fprintf (file, "%s -mcpu=%s", start,
5733 processor_target_table[rs6000_cpu_index].name);
5737 if (global_options_set.x_rs6000_tune_index)
5739 fprintf (file, "%s -mtune=%s", start,
5740 processor_target_table[rs6000_tune_index].name);
5744 if (PPC405_ERRATUM77)
5746 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5750 #ifdef USING_ELFOS_H
5751 switch (rs6000_sdata)
5753 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5754 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5755 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5756 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5759 if (rs6000_sdata && g_switch_value)
5761 fprintf (file, "%s -G %d", start,
5771 #ifdef USING_ELFOS_H
5772 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
5773 && !global_options_set.x_rs6000_cpu_index)
5775 fputs ("\t.machine ", asm_out_file);
5776 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
5777 fputs ("power9\n", asm_out_file);
5778 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5779 fputs ("power8\n", asm_out_file);
5780 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5781 fputs ("power7\n", asm_out_file);
5782 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5783 fputs ("power6\n", asm_out_file);
5784 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5785 fputs ("power5\n", asm_out_file);
5786 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5787 fputs ("power4\n", asm_out_file);
5788 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5789 fputs ("ppc64\n", asm_out_file);
5791 fputs ("ppc\n", asm_out_file);
5795 if (DEFAULT_ABI == ABI_ELFv2)
5796 fprintf (file, "\t.abiversion 2\n");
5800 /* Return nonzero if this function is known to have a null epilogue. */
5803 direct_return (void)
5805 if (reload_completed)
5807 rs6000_stack_t *info = rs6000_stack_info ();
5809 if (info->first_gp_reg_save == 32
5810 && info->first_fp_reg_save == 64
5811 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5812 && ! info->lr_save_p
5813 && ! info->cr_save_p
5814 && info->vrsave_size == 0
5822 /* Helper for num_insns_constant. Calculate number of instructions to
5823 load VALUE to a single gpr using combinations of addi, addis, ori,
5824 oris and sldi instructions. */
5827 num_insns_constant_gpr (HOST_WIDE_INT value)
5829 /* signed constant loadable with addi */
5830 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5833 /* constant loadable with addis */
5834 else if ((value & 0xffff) == 0
5835 && (value >> 31 == -1 || value >> 31 == 0))
5838 else if (TARGET_POWERPC64)
5840 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5841 HOST_WIDE_INT high = value >> 31;
5843 if (high == 0 || high == -1)
5849 return num_insns_constant_gpr (high) + 1;
5851 return num_insns_constant_gpr (low) + 1;
5853 return (num_insns_constant_gpr (high)
5854 + num_insns_constant_gpr (low) + 1);
5861 /* Helper for num_insns_constant. Allow constants formed by the
5862 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5863 and handle modes that require multiple gprs. */
5866 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5868 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5872 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5873 int insns = num_insns_constant_gpr (low);
5875 /* We won't get more than 2 from num_insns_constant_gpr
5876 except when TARGET_POWERPC64 and mode is DImode or
5877 wider, so the register mode must be DImode. */
5878 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5881 value >>= BITS_PER_WORD;
5886 /* Return the number of instructions it takes to form a constant in as
5887 many gprs are needed for MODE. */
5890 num_insns_constant (rtx op, machine_mode mode)
5894 switch (GET_CODE (op))
5900 case CONST_WIDE_INT:
5903 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5904 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5911 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5913 if (mode == SFmode || mode == SDmode)
5918 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5920 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5921 /* See the first define_split in rs6000.md handling a
5922 const_double_operand. */
5926 else if (mode == DFmode || mode == DDmode)
5931 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5933 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5935 /* See the second (32-bit) and third (64-bit) define_split
5936 in rs6000.md handling a const_double_operand. */
5937 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5938 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5941 else if (mode == TFmode || mode == TDmode
5942 || mode == KFmode || mode == IFmode)
5948 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5950 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5952 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5953 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5954 insns = num_insns_constant_multi (val, DImode);
5955 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5956 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5957 insns += num_insns_constant_multi (val, DImode);
5969 return num_insns_constant_multi (val, mode);
5972 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5973 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5974 corresponding element of the vector, but for V4SFmode, the
5975 corresponding "float" is interpreted as an SImode integer. */
5978 const_vector_elt_as_int (rtx op, unsigned int elt)
5982 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5983 gcc_assert (GET_MODE (op) != V2DImode
5984 && GET_MODE (op) != V2DFmode);
5986 tmp = CONST_VECTOR_ELT (op, elt);
5987 if (GET_MODE (op) == V4SFmode)
5988 tmp = gen_lowpart (SImode, tmp);
5989 return INTVAL (tmp);
5992 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5993 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5994 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5995 all items are set to the same value and contain COPIES replicas of the
5996 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5997 operand and the others are set to the value of the operand's msb. */
6000 vspltis_constant (rtx op, unsigned step, unsigned copies)
6002 machine_mode mode = GET_MODE (op);
6003 machine_mode inner = GET_MODE_INNER (mode);
6011 HOST_WIDE_INT splat_val;
6012 HOST_WIDE_INT msb_val;
6014 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6017 nunits = GET_MODE_NUNITS (mode);
6018 bitsize = GET_MODE_BITSIZE (inner);
6019 mask = GET_MODE_MASK (inner);
6021 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6023 msb_val = val >= 0 ? 0 : -1;
6025 /* Construct the value to be splatted, if possible. If not, return 0. */
6026 for (i = 2; i <= copies; i *= 2)
6028 HOST_WIDE_INT small_val;
6030 small_val = splat_val >> bitsize;
6032 if (splat_val != ((HOST_WIDE_INT)
6033 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6034 | (small_val & mask)))
6036 splat_val = small_val;
6039 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6040 if (EASY_VECTOR_15 (splat_val))
6043 /* Also check if we can splat, and then add the result to itself. Do so if
6044 the value is positive, of if the splat instruction is using OP's mode;
6045 for splat_val < 0, the splat and the add should use the same mode. */
6046 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6047 && (splat_val >= 0 || (step == 1 && copies == 1)))
6050 /* Also check if are loading up the most significant bit which can be done by
6051 loading up -1 and shifting the value left by -1. */
6052 else if (EASY_VECTOR_MSB (splat_val, inner))
6058 /* Check if VAL is present in every STEP-th element, and the
6059 other elements are filled with its most significant bit. */
6060 for (i = 1; i < nunits; ++i)
6062 HOST_WIDE_INT desired_val;
6063 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6064 if ((i & (step - 1)) == 0)
6067 desired_val = msb_val;
6069 if (desired_val != const_vector_elt_as_int (op, elt))
6076 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6077 instruction, filling in the bottom elements with 0 or -1.
6079 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6080 for the number of zeroes to shift in, or negative for the number of 0xff
6083 OP is a CONST_VECTOR. */
6086 vspltis_shifted (rtx op)
6088 machine_mode mode = GET_MODE (op);
6089 machine_mode inner = GET_MODE_INNER (mode);
6097 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6100 /* We need to create pseudo registers to do the shift, so don't recognize
6101 shift vector constants after reload. */
6102 if (!can_create_pseudo_p ())
6105 nunits = GET_MODE_NUNITS (mode);
6106 mask = GET_MODE_MASK (inner);
6108 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6110 /* Check if the value can really be the operand of a vspltis[bhw]. */
6111 if (EASY_VECTOR_15 (val))
6114 /* Also check if we are loading up the most significant bit which can be done
6115 by loading up -1 and shifting the value left by -1. */
6116 else if (EASY_VECTOR_MSB (val, inner))
6122 /* Check if VAL is present in every STEP-th element until we find elements
6123 that are 0 or all 1 bits. */
6124 for (i = 1; i < nunits; ++i)
6126 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6127 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6129 /* If the value isn't the splat value, check for the remaining elements
6135 for (j = i+1; j < nunits; ++j)
6137 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6138 if (const_vector_elt_as_int (op, elt2) != 0)
6142 return (nunits - i) * GET_MODE_SIZE (inner);
6145 else if ((elt_val & mask) == mask)
6147 for (j = i+1; j < nunits; ++j)
6149 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6150 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6154 return -((nunits - i) * GET_MODE_SIZE (inner));
6162 /* If all elements are equal, we don't need to do VLSDOI. */
6167 /* Return true if OP is of the given MODE and can be synthesized
6168 with a vspltisb, vspltish or vspltisw. */
6171 easy_altivec_constant (rtx op, machine_mode mode)
6173 unsigned step, copies;
6175 if (mode == VOIDmode)
6176 mode = GET_MODE (op);
6177 else if (mode != GET_MODE (op))
6180 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6182 if (mode == V2DFmode)
6183 return zero_constant (op, mode);
6185 else if (mode == V2DImode)
6187 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6188 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6191 if (zero_constant (op, mode))
6194 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6195 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6201 /* V1TImode is a special container for TImode. Ignore for now. */
6202 else if (mode == V1TImode)
6205 /* Start with a vspltisw. */
6206 step = GET_MODE_NUNITS (mode) / 4;
6209 if (vspltis_constant (op, step, copies))
6212 /* Then try with a vspltish. */
6218 if (vspltis_constant (op, step, copies))
6221 /* And finally a vspltisb. */
6227 if (vspltis_constant (op, step, copies))
6230 if (vspltis_shifted (op) != 0)
6236 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6237 result is OP. Abort if it is not possible. */
6240 gen_easy_altivec_constant (rtx op)
6242 machine_mode mode = GET_MODE (op);
6243 int nunits = GET_MODE_NUNITS (mode);
6244 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6245 unsigned step = nunits / 4;
6246 unsigned copies = 1;
6248 /* Start with a vspltisw. */
6249 if (vspltis_constant (op, step, copies))
6250 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6252 /* Then try with a vspltish. */
6258 if (vspltis_constant (op, step, copies))
6259 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6261 /* And finally a vspltisb. */
6267 if (vspltis_constant (op, step, copies))
6268 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6273 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6274 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6276 Return the number of instructions needed (1 or 2) into the address pointed
6279 Return the constant that is being split via CONSTANT_PTR. */
6282 xxspltib_constant_p (rtx op,
6287 size_t nunits = GET_MODE_NUNITS (mode);
6289 HOST_WIDE_INT value;
6292 /* Set the returned values to out of bound values. */
6293 *num_insns_ptr = -1;
6294 *constant_ptr = 256;
6296 if (!TARGET_P9_VECTOR)
6299 if (mode == VOIDmode)
6300 mode = GET_MODE (op);
6302 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6305 /* Handle (vec_duplicate <constant>). */
6306 if (GET_CODE (op) == VEC_DUPLICATE)
6308 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6309 && mode != V2DImode)
6312 element = XEXP (op, 0);
6313 if (!CONST_INT_P (element))
6316 value = INTVAL (element);
6317 if (!IN_RANGE (value, -128, 127))
6321 /* Handle (const_vector [...]). */
6322 else if (GET_CODE (op) == CONST_VECTOR)
6324 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6325 && mode != V2DImode)
6328 element = CONST_VECTOR_ELT (op, 0);
6329 if (!CONST_INT_P (element))
6332 value = INTVAL (element);
6333 if (!IN_RANGE (value, -128, 127))
6336 for (i = 1; i < nunits; i++)
6338 element = CONST_VECTOR_ELT (op, i);
6339 if (!CONST_INT_P (element))
6342 if (value != INTVAL (element))
6347 /* Handle integer constants being loaded into the upper part of the VSX
6348 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6349 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6350 else if (CONST_INT_P (op))
6352 if (!SCALAR_INT_MODE_P (mode))
6355 value = INTVAL (op);
6356 if (!IN_RANGE (value, -128, 127))
6359 if (!IN_RANGE (value, -1, 0))
6361 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6364 if (EASY_VECTOR_15 (value))
6372 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6373 sign extend. Special case 0/-1 to allow getting any VSX register instead
6374 of an Altivec register. */
6375 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6376 && EASY_VECTOR_15 (value))
6379 /* Return # of instructions and the constant byte for XXSPLTIB. */
6380 if (mode == V16QImode)
6383 else if (IN_RANGE (value, -1, 0))
6389 *constant_ptr = (int) value;
6394 output_vec_const_move (rtx *operands)
6402 mode = GET_MODE (dest);
6406 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6407 int xxspltib_value = 256;
6410 if (zero_constant (vec, mode))
6412 if (TARGET_P9_VECTOR)
6413 return "xxspltib %x0,0";
6415 else if (dest_vmx_p)
6416 return "vspltisw %0,0";
6419 return "xxlxor %x0,%x0,%x0";
6422 if (all_ones_constant (vec, mode))
6424 if (TARGET_P9_VECTOR)
6425 return "xxspltib %x0,255";
6427 else if (dest_vmx_p)
6428 return "vspltisw %0,-1";
6430 else if (TARGET_P8_VECTOR)
6431 return "xxlorc %x0,%x0,%x0";
6437 if (TARGET_P9_VECTOR
6438 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6442 operands[2] = GEN_INT (xxspltib_value & 0xff);
6443 return "xxspltib %x0,%2";
6454 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6455 if (zero_constant (vec, mode))
6456 return "vspltisw %0,0";
6458 if (all_ones_constant (vec, mode))
6459 return "vspltisw %0,-1";
6461 /* Do we need to construct a value using VSLDOI? */
6462 shift = vspltis_shifted (vec);
6466 splat_vec = gen_easy_altivec_constant (vec);
6467 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6468 operands[1] = XEXP (splat_vec, 0);
6469 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6472 switch (GET_MODE (splat_vec))
6475 return "vspltisw %0,%1";
6478 return "vspltish %0,%1";
6481 return "vspltisb %0,%1";
6491 /* Initialize vector TARGET to VALS. */
6494 rs6000_expand_vector_init (rtx target, rtx vals)
6496 machine_mode mode = GET_MODE (target);
6497 machine_mode inner_mode = GET_MODE_INNER (mode);
6498 int n_elts = GET_MODE_NUNITS (mode);
6499 int n_var = 0, one_var = -1;
6500 bool all_same = true, all_const_zero = true;
6504 for (i = 0; i < n_elts; ++i)
6506 x = XVECEXP (vals, 0, i);
6507 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6508 ++n_var, one_var = i;
6509 else if (x != CONST0_RTX (inner_mode))
6510 all_const_zero = false;
6512 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6518 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6519 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6520 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6522 /* Zero register. */
6523 emit_move_insn (target, CONST0_RTX (mode));
6526 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6528 /* Splat immediate. */
6529 emit_insn (gen_rtx_SET (target, const_vec));
6534 /* Load from constant pool. */
6535 emit_move_insn (target, const_vec);
6540 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6541 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6545 size_t num_elements = all_same ? 1 : 2;
6546 for (i = 0; i < num_elements; i++)
6548 op[i] = XVECEXP (vals, 0, i);
6549 /* Just in case there is a SUBREG with a smaller mode, do a
6551 if (GET_MODE (op[i]) != inner_mode)
6553 rtx tmp = gen_reg_rtx (inner_mode);
6554 convert_move (tmp, op[i], 0);
6557 /* Allow load with splat double word. */
6558 else if (MEM_P (op[i]))
6561 op[i] = force_reg (inner_mode, op[i]);
6563 else if (!REG_P (op[i]))
6564 op[i] = force_reg (inner_mode, op[i]);
6569 if (mode == V2DFmode)
6570 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6572 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6576 if (mode == V2DFmode)
6577 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6579 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6584 /* Special case initializing vector int if we are on 64-bit systems with
6585 direct move or we have the ISA 3.0 instructions. */
6586 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6587 && TARGET_DIRECT_MOVE_64BIT)
6591 rtx element0 = XVECEXP (vals, 0, 0);
6592 if (MEM_P (element0))
6593 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6595 element0 = force_reg (SImode, element0);
6597 if (TARGET_P9_VECTOR)
6598 emit_insn (gen_vsx_splat_v4si (target, element0));
6601 rtx tmp = gen_reg_rtx (DImode);
6602 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6603 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6612 for (i = 0; i < 4; i++)
6613 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6615 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6616 elements[2], elements[3]));
6621 /* With single precision floating point on VSX, know that internally single
6622 precision is actually represented as a double, and either make 2 V2DF
6623 vectors, and convert these vectors to single precision, or do one
6624 conversion, and splat the result to the other elements. */
6625 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6629 rtx element0 = XVECEXP (vals, 0, 0);
6631 if (TARGET_P9_VECTOR)
6633 if (MEM_P (element0))
6634 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6636 emit_insn (gen_vsx_splat_v4sf (target, element0));
6641 rtx freg = gen_reg_rtx (V4SFmode);
6642 rtx sreg = force_reg (SFmode, element0);
6643 rtx cvt = (TARGET_XSCVDPSPN
6644 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6645 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6648 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6654 rtx dbl_even = gen_reg_rtx (V2DFmode);
6655 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6656 rtx flt_even = gen_reg_rtx (V4SFmode);
6657 rtx flt_odd = gen_reg_rtx (V4SFmode);
6658 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6659 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6660 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6661 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6663 /* Use VMRGEW if we can instead of doing a permute. */
6664 if (TARGET_P8_VECTOR)
6666 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6667 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6668 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6669 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6670 if (BYTES_BIG_ENDIAN)
6671 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6673 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6677 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6678 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6679 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6680 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6681 rs6000_expand_extract_even (target, flt_even, flt_odd);
6687 /* Special case initializing vector short/char that are splats if we are on
6688 64-bit systems with direct move. */
6689 if (all_same && TARGET_DIRECT_MOVE_64BIT
6690 && (mode == V16QImode || mode == V8HImode))
6692 rtx op0 = XVECEXP (vals, 0, 0);
6693 rtx di_tmp = gen_reg_rtx (DImode);
6696 op0 = force_reg (GET_MODE_INNER (mode), op0);
6698 if (mode == V16QImode)
6700 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6701 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6705 if (mode == V8HImode)
6707 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6708 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6713 /* Store value to stack temp. Load vector element. Splat. However, splat
6714 of 64-bit items is not supported on Altivec. */
6715 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6717 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6718 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6719 XVECEXP (vals, 0, 0));
6720 x = gen_rtx_UNSPEC (VOIDmode,
6721 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6722 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6724 gen_rtx_SET (target, mem),
6726 x = gen_rtx_VEC_SELECT (inner_mode, target,
6727 gen_rtx_PARALLEL (VOIDmode,
6728 gen_rtvec (1, const0_rtx)));
6729 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6733 /* One field is non-constant. Load constant then overwrite
6737 rtx copy = copy_rtx (vals);
6739 /* Load constant part of vector, substitute neighboring value for
6741 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6742 rs6000_expand_vector_init (target, copy);
6744 /* Insert variable. */
6745 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6749 /* Construct the vector in memory one field at a time
6750 and load the whole vector. */
6751 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6752 for (i = 0; i < n_elts; i++)
6753 emit_move_insn (adjust_address_nv (mem, inner_mode,
6754 i * GET_MODE_SIZE (inner_mode)),
6755 XVECEXP (vals, 0, i));
6756 emit_move_insn (target, mem);
6759 /* Set field ELT of TARGET to VAL. */
6762 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6764 machine_mode mode = GET_MODE (target);
6765 machine_mode inner_mode = GET_MODE_INNER (mode);
6766 rtx reg = gen_reg_rtx (mode);
6768 int width = GET_MODE_SIZE (inner_mode);
6771 val = force_reg (GET_MODE (val), val);
6773 if (VECTOR_MEM_VSX_P (mode))
6775 rtx insn = NULL_RTX;
6776 rtx elt_rtx = GEN_INT (elt);
6778 if (mode == V2DFmode)
6779 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6781 else if (mode == V2DImode)
6782 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6784 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6786 if (mode == V4SImode)
6787 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6788 else if (mode == V8HImode)
6789 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6790 else if (mode == V16QImode)
6791 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6792 else if (mode == V4SFmode)
6793 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6803 /* Simplify setting single element vectors like V1TImode. */
6804 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6806 emit_move_insn (target, gen_lowpart (mode, val));
6810 /* Load single variable value. */
6811 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6812 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6813 x = gen_rtx_UNSPEC (VOIDmode,
6814 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6815 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6817 gen_rtx_SET (reg, mem),
6820 /* Linear sequence. */
6821 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6822 for (i = 0; i < 16; ++i)
6823 XVECEXP (mask, 0, i) = GEN_INT (i);
6825 /* Set permute mask to insert element into target. */
6826 for (i = 0; i < width; ++i)
6827 XVECEXP (mask, 0, elt*width + i)
6828 = GEN_INT (i + 0x10);
6829 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6831 if (BYTES_BIG_ENDIAN)
6832 x = gen_rtx_UNSPEC (mode,
6833 gen_rtvec (3, target, reg,
6834 force_reg (V16QImode, x)),
6838 if (TARGET_P9_VECTOR)
6839 x = gen_rtx_UNSPEC (mode,
6840 gen_rtvec (3, reg, target,
6841 force_reg (V16QImode, x)),
6845 /* Invert selector. We prefer to generate VNAND on P8 so
6846 that future fusion opportunities can kick in, but must
6847 generate VNOR elsewhere. */
6848 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6849 rtx iorx = (TARGET_P8_VECTOR
6850 ? gen_rtx_IOR (V16QImode, notx, notx)
6851 : gen_rtx_AND (V16QImode, notx, notx));
6852 rtx tmp = gen_reg_rtx (V16QImode);
6853 emit_insn (gen_rtx_SET (tmp, iorx));
6855 /* Permute with operands reversed and adjusted selector. */
6856 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6861 emit_insn (gen_rtx_SET (target, x));
6864 /* Extract field ELT from VEC into TARGET. */
6867 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6869 machine_mode mode = GET_MODE (vec);
6870 machine_mode inner_mode = GET_MODE_INNER (mode);
6873 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6880 emit_move_insn (target, gen_lowpart (TImode, vec));
6883 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6886 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6889 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6892 if (TARGET_DIRECT_MOVE_64BIT)
6894 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6900 if (TARGET_DIRECT_MOVE_64BIT)
6902 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6908 if (TARGET_DIRECT_MOVE_64BIT)
6910 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6916 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6917 && TARGET_DIRECT_MOVE_64BIT)
6919 if (GET_MODE (elt) != DImode)
6921 rtx tmp = gen_reg_rtx (DImode);
6922 convert_move (tmp, elt, 0);
6925 else if (!REG_P (elt))
6926 elt = force_reg (DImode, elt);
6931 emit_move_insn (target, gen_lowpart (TImode, vec));
6935 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6939 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6943 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6947 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6951 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6955 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6963 /* Allocate mode-sized buffer. */
6964 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6966 emit_move_insn (mem, vec);
6967 if (CONST_INT_P (elt))
6969 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6971 /* Add offset to field within buffer matching vector element. */
6972 mem = adjust_address_nv (mem, inner_mode,
6973 modulo_elt * GET_MODE_SIZE (inner_mode));
6974 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6978 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6979 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6980 rtx new_addr = gen_reg_rtx (Pmode);
6982 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6984 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6985 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6986 new_addr = change_address (mem, inner_mode, new_addr);
6987 emit_move_insn (target, new_addr);
6991 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6992 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6993 temporary (BASE_TMP) to fixup the address. Return the new memory address
6994 that is valid for reads or writes to a given register (SCALAR_REG). */
6997 rs6000_adjust_vec_address (rtx scalar_reg,
7001 machine_mode scalar_mode)
7003 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7004 rtx addr = XEXP (mem, 0);
7009 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7010 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7012 /* Calculate what we need to add to the address to get the element
7014 if (CONST_INT_P (element))
7015 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7018 int byte_shift = exact_log2 (scalar_size);
7019 gcc_assert (byte_shift >= 0);
7021 if (byte_shift == 0)
7022 element_offset = element;
7026 if (TARGET_POWERPC64)
7027 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7029 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7031 element_offset = base_tmp;
7035 /* Create the new address pointing to the element within the vector. If we
7036 are adding 0, we don't have to change the address. */
7037 if (element_offset == const0_rtx)
7040 /* A simple indirect address can be converted into a reg + offset
7042 else if (REG_P (addr) || SUBREG_P (addr))
7043 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7045 /* Optimize D-FORM addresses with constant offset with a constant element, to
7046 include the element offset in the address directly. */
7047 else if (GET_CODE (addr) == PLUS)
7049 rtx op0 = XEXP (addr, 0);
7050 rtx op1 = XEXP (addr, 1);
7053 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7054 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7056 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7057 rtx offset_rtx = GEN_INT (offset);
7059 if (IN_RANGE (offset, -32768, 32767)
7060 && (scalar_size < 8 || (offset & 0x3) == 0))
7061 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7064 emit_move_insn (base_tmp, offset_rtx);
7065 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7070 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7071 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7073 /* Note, ADDI requires the register being added to be a base
7074 register. If the register was R0, load it up into the temporary
7077 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7079 insn = gen_add3_insn (base_tmp, op1, element_offset);
7080 gcc_assert (insn != NULL_RTX);
7085 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7087 insn = gen_add3_insn (base_tmp, element_offset, op1);
7088 gcc_assert (insn != NULL_RTX);
7094 emit_move_insn (base_tmp, op1);
7095 emit_insn (gen_add2_insn (base_tmp, element_offset));
7098 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7104 emit_move_insn (base_tmp, addr);
7105 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7108 /* If we have a PLUS, we need to see whether the particular register class
7109 allows for D-FORM or X-FORM addressing. */
7110 if (GET_CODE (new_addr) == PLUS)
7112 rtx op1 = XEXP (new_addr, 1);
7113 addr_mask_type addr_mask;
7114 unsigned int scalar_regno = reg_or_subregno (scalar_reg);
7116 gcc_assert (HARD_REGISTER_NUM_P (scalar_regno));
7117 if (INT_REGNO_P (scalar_regno))
7118 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7120 else if (FP_REGNO_P (scalar_regno))
7121 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7123 else if (ALTIVEC_REGNO_P (scalar_regno))
7124 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7129 if (REG_P (op1) || SUBREG_P (op1))
7130 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7132 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7135 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7136 valid_addr_p = true;
7139 valid_addr_p = false;
7143 emit_move_insn (base_tmp, new_addr);
7144 new_addr = base_tmp;
7147 return change_address (mem, scalar_mode, new_addr);
7150 /* Split a variable vec_extract operation into the component instructions. */
7153 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7156 machine_mode mode = GET_MODE (src);
7157 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7158 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7159 int byte_shift = exact_log2 (scalar_size);
7161 gcc_assert (byte_shift >= 0);
7163 /* If we are given a memory address, optimize to load just the element. We
7164 don't have to adjust the vector element number on little endian
7168 int num_elements = GET_MODE_NUNITS (mode);
7169 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7171 emit_insn (gen_anddi3 (element, element, num_ele_m1));
7172 gcc_assert (REG_P (tmp_gpr));
7173 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7174 tmp_gpr, scalar_mode));
7178 else if (REG_P (src) || SUBREG_P (src))
7180 int num_elements = GET_MODE_NUNITS (mode);
7181 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7182 int bit_shift = 7 - exact_log2 (num_elements);
7184 unsigned int dest_regno = reg_or_subregno (dest);
7185 unsigned int src_regno = reg_or_subregno (src);
7186 unsigned int element_regno = reg_or_subregno (element);
7188 gcc_assert (REG_P (tmp_gpr));
7190 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7191 a general purpose register. */
7192 if (TARGET_P9_VECTOR
7193 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7194 && INT_REGNO_P (dest_regno)
7195 && ALTIVEC_REGNO_P (src_regno)
7196 && INT_REGNO_P (element_regno))
7198 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7199 rtx element_si = gen_rtx_REG (SImode, element_regno);
7201 if (mode == V16QImode)
7202 emit_insn (BYTES_BIG_ENDIAN
7203 ? gen_vextublx (dest_si, element_si, src)
7204 : gen_vextubrx (dest_si, element_si, src));
7206 else if (mode == V8HImode)
7208 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7209 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7210 emit_insn (BYTES_BIG_ENDIAN
7211 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7212 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7218 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7219 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7220 emit_insn (BYTES_BIG_ENDIAN
7221 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7222 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7229 gcc_assert (REG_P (tmp_altivec));
7231 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7232 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7233 will shift the element into the upper position (adding 3 to convert a
7234 byte shift into a bit shift). */
7235 if (scalar_size == 8)
7237 if (!BYTES_BIG_ENDIAN)
7239 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7245 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7247 emit_insn (gen_rtx_SET (tmp_gpr,
7248 gen_rtx_AND (DImode,
7249 gen_rtx_ASHIFT (DImode,
7256 if (!BYTES_BIG_ENDIAN)
7258 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7260 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7261 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7267 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7270 /* Get the value into the lower byte of the Altivec register where VSLO
7272 if (TARGET_P9_VECTOR)
7273 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7274 else if (can_create_pseudo_p ())
7275 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7278 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7279 emit_move_insn (tmp_di, tmp_gpr);
7280 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7283 /* Do the VSLO to get the value into the final location. */
7287 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7291 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7296 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7297 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7298 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7299 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7302 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7310 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7311 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7312 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7313 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7315 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7316 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7317 GEN_INT (64 - bits_in_element)));
7331 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7332 selects whether the alignment is abi mandated, optional, or
7333 both abi and optional alignment. */
7336 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7338 if (how != align_opt)
7340 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7344 if (how != align_abi)
7346 if (TREE_CODE (type) == ARRAY_TYPE
7347 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7349 if (align < BITS_PER_WORD)
7350 align = BITS_PER_WORD;
7357 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7358 instructions simply ignore the low bits; VSX memory instructions
7359 are aligned to 4 or 8 bytes. */
7362 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7364 return (STRICT_ALIGNMENT
7365 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7366 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7367 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7368 && (int) align < VECTOR_ALIGN (mode)))));
7371 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7374 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7376 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7378 if (computed != 128)
7381 if (!warned && warn_psabi)
7384 inform (input_location,
7385 "the layout of aggregates containing vectors with"
7386 " %d-byte alignment has changed in GCC 5",
7387 computed / BITS_PER_UNIT);
7390 /* In current GCC there is no special case. */
7397 /* AIX increases natural record alignment to doubleword if the first
7398 field is an FP double while the FP fields remain word aligned. */
7401 rs6000_special_round_type_align (tree type, unsigned int computed,
7402 unsigned int specified)
7404 unsigned int align = MAX (computed, specified);
7405 tree field = TYPE_FIELDS (type);
7407 /* Skip all non field decls */
7408 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7409 field = DECL_CHAIN (field);
7411 if (field != NULL && field != type)
7413 type = TREE_TYPE (field);
7414 while (TREE_CODE (type) == ARRAY_TYPE)
7415 type = TREE_TYPE (type);
7417 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7418 align = MAX (align, 64);
7424 /* Darwin increases record alignment to the natural alignment of
7428 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7429 unsigned int specified)
7431 unsigned int align = MAX (computed, specified);
7433 if (TYPE_PACKED (type))
7436 /* Find the first field, looking down into aggregates. */
7438 tree field = TYPE_FIELDS (type);
7439 /* Skip all non field decls */
7440 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7441 field = DECL_CHAIN (field);
7444 /* A packed field does not contribute any extra alignment. */
7445 if (DECL_PACKED (field))
7447 type = TREE_TYPE (field);
7448 while (TREE_CODE (type) == ARRAY_TYPE)
7449 type = TREE_TYPE (type);
7450 } while (AGGREGATE_TYPE_P (type));
7452 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7453 align = MAX (align, TYPE_ALIGN (type));
7458 /* Return 1 for an operand in small memory on V.4/eabi. */
7461 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7462 machine_mode mode ATTRIBUTE_UNUSED)
7467 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7470 if (DEFAULT_ABI != ABI_V4)
7473 if (SYMBOL_REF_P (op))
7476 else if (GET_CODE (op) != CONST
7477 || GET_CODE (XEXP (op, 0)) != PLUS
7478 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7479 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7484 rtx sum = XEXP (op, 0);
7485 HOST_WIDE_INT summand;
7487 /* We have to be careful here, because it is the referenced address
7488 that must be 32k from _SDA_BASE_, not just the symbol. */
7489 summand = INTVAL (XEXP (sum, 1));
7490 if (summand < 0 || summand > g_switch_value)
7493 sym_ref = XEXP (sum, 0);
7496 return SYMBOL_REF_SMALL_P (sym_ref);
7502 /* Return true if either operand is a general purpose register. */
7505 gpr_or_gpr_p (rtx op0, rtx op1)
7507 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7508 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7511 /* Return true if this is a move direct operation between GPR registers and
7512 floating point/VSX registers. */
7515 direct_move_p (rtx op0, rtx op1)
7519 if (!REG_P (op0) || !REG_P (op1))
7522 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
7525 regno0 = REGNO (op0);
7526 regno1 = REGNO (op1);
7527 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7530 if (INT_REGNO_P (regno0))
7531 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
7533 else if (INT_REGNO_P (regno1))
7535 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
7538 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
7545 /* Return true if the OFFSET is valid for the quad address instructions that
7546 use d-form (register + offset) addressing. */
7549 quad_address_offset_p (HOST_WIDE_INT offset)
7551 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
7554 /* Return true if the ADDR is an acceptable address for a quad memory
7555 operation of mode MODE (either LQ/STQ for general purpose registers, or
7556 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7557 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7558 3.0 LXV/STXV instruction. */
7561 quad_address_p (rtx addr, machine_mode mode, bool strict)
7565 if (GET_MODE_SIZE (mode) != 16)
7568 if (legitimate_indirect_address_p (addr, strict))
7571 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7574 if (GET_CODE (addr) != PLUS)
7577 op0 = XEXP (addr, 0);
7578 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7581 op1 = XEXP (addr, 1);
7582 if (!CONST_INT_P (op1))
7585 return quad_address_offset_p (INTVAL (op1));
7588 /* Return true if this is a load or store quad operation. This function does
7589 not handle the atomic quad memory instructions. */
7592 quad_load_store_p (rtx op0, rtx op1)
7596 if (!TARGET_QUAD_MEMORY)
7599 else if (REG_P (op0) && MEM_P (op1))
7600 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7601 && quad_memory_operand (op1, GET_MODE (op1))
7602 && !reg_overlap_mentioned_p (op0, op1));
7604 else if (MEM_P (op0) && REG_P (op1))
7605 ret = (quad_memory_operand (op0, GET_MODE (op0))
7606 && quad_int_reg_operand (op1, GET_MODE (op1)));
7611 if (TARGET_DEBUG_ADDR)
7613 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7614 ret ? "true" : "false");
7615 debug_rtx (gen_rtx_SET (op0, op1));
7621 /* Given an address, return a constant offset term if one exists. */
7624 address_offset (rtx op)
7626 if (GET_CODE (op) == PRE_INC
7627 || GET_CODE (op) == PRE_DEC)
7629 else if (GET_CODE (op) == PRE_MODIFY
7630 || GET_CODE (op) == LO_SUM)
7633 if (GET_CODE (op) == CONST)
7636 if (GET_CODE (op) == PLUS)
7639 if (CONST_INT_P (op))
7645 /* Return true if the MEM operand is a memory operand suitable for use
7646 with a (full width, possibly multiple) gpr load/store. On
7647 powerpc64 this means the offset must be divisible by 4.
7648 Implements 'Y' constraint.
7650 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7651 a constraint function we know the operand has satisfied a suitable
7654 Offsetting a lo_sum should not be allowed, except where we know by
7655 alignment that a 32k boundary is not crossed. Note that by
7656 "offsetting" here we mean a further offset to access parts of the
7657 MEM. It's fine to have a lo_sum where the inner address is offset
7658 from a sym, since the same sym+offset will appear in the high part
7659 of the address calculation. */
7662 mem_operand_gpr (rtx op, machine_mode mode)
7664 unsigned HOST_WIDE_INT offset;
7666 rtx addr = XEXP (op, 0);
7668 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7670 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7671 && mode_supports_pre_incdec_p (mode)
7672 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7675 /* Don't allow non-offsettable addresses. See PRs 83969 and 84279. */
7676 if (!rs6000_offsettable_memref_p (op, mode, false))
7679 op = address_offset (addr);
7683 offset = INTVAL (op);
7684 if (TARGET_POWERPC64 && (offset & 3) != 0)
7687 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7691 if (GET_CODE (addr) == LO_SUM)
7692 /* For lo_sum addresses, we must allow any offset except one that
7693 causes a wrap, so test only the low 16 bits. */
7694 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7696 return offset + 0x8000 < 0x10000u - extra;
7699 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7700 enforce an offset divisible by 4 even for 32-bit. */
7703 mem_operand_ds_form (rtx op, machine_mode mode)
7705 unsigned HOST_WIDE_INT offset;
7707 rtx addr = XEXP (op, 0);
7709 if (!offsettable_address_p (false, mode, addr))
7712 op = address_offset (addr);
7716 offset = INTVAL (op);
7717 if ((offset & 3) != 0)
7720 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7724 if (GET_CODE (addr) == LO_SUM)
7725 /* For lo_sum addresses, we must allow any offset except one that
7726 causes a wrap, so test only the low 16 bits. */
7727 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7729 return offset + 0x8000 < 0x10000u - extra;
7732 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7735 reg_offset_addressing_ok_p (machine_mode mode)
7749 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7750 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7751 a vector mode, if we want to use the VSX registers to move it around,
7752 we need to restrict ourselves to reg+reg addressing. Similarly for
7753 IEEE 128-bit floating point that is passed in a single vector
7755 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7756 return mode_supports_dq_form (mode);
7760 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7761 addressing for the LFIWZX and STFIWX instructions. */
7762 if (TARGET_NO_SDMODE_STACK)
7774 virtual_stack_registers_memory_p (rtx op)
7779 regnum = REGNO (op);
7781 else if (GET_CODE (op) == PLUS
7782 && REG_P (XEXP (op, 0))
7783 && CONST_INT_P (XEXP (op, 1)))
7784 regnum = REGNO (XEXP (op, 0));
7789 return (regnum >= FIRST_VIRTUAL_REGISTER
7790 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7793 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7794 is known to not straddle a 32k boundary. This function is used
7795 to determine whether -mcmodel=medium code can use TOC pointer
7796 relative addressing for OP. This means the alignment of the TOC
7797 pointer must also be taken into account, and unfortunately that is
7800 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7801 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7805 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7809 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7811 if (!SYMBOL_REF_P (op))
7814 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7816 if (mode_supports_dq_form (mode))
7819 dsize = GET_MODE_SIZE (mode);
7820 decl = SYMBOL_REF_DECL (op);
7826 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7827 replacing memory addresses with an anchor plus offset. We
7828 could find the decl by rummaging around in the block->objects
7829 VEC for the given offset but that seems like too much work. */
7830 dalign = BITS_PER_UNIT;
7831 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7832 && SYMBOL_REF_ANCHOR_P (op)
7833 && SYMBOL_REF_BLOCK (op) != NULL)
7835 struct object_block *block = SYMBOL_REF_BLOCK (op);
7837 dalign = block->alignment;
7838 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7840 else if (CONSTANT_POOL_ADDRESS_P (op))
7842 /* It would be nice to have get_pool_align().. */
7843 machine_mode cmode = get_pool_mode (op);
7845 dalign = GET_MODE_ALIGNMENT (cmode);
7848 else if (DECL_P (decl))
7850 dalign = DECL_ALIGN (decl);
7854 /* Allow BLKmode when the entire object is known to not
7855 cross a 32k boundary. */
7856 if (!DECL_SIZE_UNIT (decl))
7859 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7862 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7866 dalign /= BITS_PER_UNIT;
7867 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7868 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7869 return dalign >= dsize;
7875 /* Find how many bits of the alignment we know for this access. */
7876 dalign /= BITS_PER_UNIT;
7877 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7878 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7880 lsb = offset & -offset;
7884 return dalign >= dsize;
7888 constant_pool_expr_p (rtx op)
7892 split_const (op, &base, &offset);
7893 return (SYMBOL_REF_P (base)
7894 && CONSTANT_POOL_ADDRESS_P (base)
7895 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7898 /* These are only used to pass through from print_operand/print_operand_address
7899 to rs6000_output_addr_const_extra over the intervening function
7900 output_addr_const which is not target code. */
7901 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7903 /* Return true if OP is a toc pointer relative address (the output
7904 of create_TOC_reference). If STRICT, do not match non-split
7905 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7906 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7907 TOCREL_OFFSET_RET respectively. */
7910 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7911 const_rtx *tocrel_offset_ret)
7916 if (TARGET_CMODEL != CMODEL_SMALL)
7918 /* When strict ensure we have everything tidy. */
7920 && !(GET_CODE (op) == LO_SUM
7921 && REG_P (XEXP (op, 0))
7922 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7925 /* When not strict, allow non-split TOC addresses and also allow
7926 (lo_sum (high ..)) TOC addresses created during reload. */
7927 if (GET_CODE (op) == LO_SUM)
7931 const_rtx tocrel_base = op;
7932 const_rtx tocrel_offset = const0_rtx;
7934 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7936 tocrel_base = XEXP (op, 0);
7937 tocrel_offset = XEXP (op, 1);
7940 if (tocrel_base_ret)
7941 *tocrel_base_ret = tocrel_base;
7942 if (tocrel_offset_ret)
7943 *tocrel_offset_ret = tocrel_offset;
7945 return (GET_CODE (tocrel_base) == UNSPEC
7946 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7947 && REG_P (XVECEXP (tocrel_base, 0, 1))
7948 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7951 /* Return true if X is a constant pool address, and also for cmodel=medium
7952 if X is a toc-relative address known to be offsettable within MODE. */
7955 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7958 const_rtx tocrel_base, tocrel_offset;
7959 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7960 && (TARGET_CMODEL != CMODEL_MEDIUM
7961 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7963 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7964 INTVAL (tocrel_offset), mode)));
7968 legitimate_small_data_p (machine_mode mode, rtx x)
7970 return (DEFAULT_ABI == ABI_V4
7971 && !flag_pic && !TARGET_TOC
7972 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7973 && small_data_operand (x, mode));
7977 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7978 bool strict, bool worst_case)
7980 unsigned HOST_WIDE_INT offset;
7983 if (GET_CODE (x) != PLUS)
7985 if (!REG_P (XEXP (x, 0)))
7987 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7989 if (mode_supports_dq_form (mode))
7990 return quad_address_p (x, mode, strict);
7991 if (!reg_offset_addressing_ok_p (mode))
7992 return virtual_stack_registers_memory_p (x);
7993 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7995 if (!CONST_INT_P (XEXP (x, 1)))
7998 offset = INTVAL (XEXP (x, 1));
8005 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8007 if (VECTOR_MEM_VSX_P (mode))
8012 if (!TARGET_POWERPC64)
8014 else if (offset & 3)
8027 if (!TARGET_POWERPC64)
8029 else if (offset & 3)
8038 return offset < 0x10000 - extra;
8042 legitimate_indexed_address_p (rtx x, int strict)
8046 if (GET_CODE (x) != PLUS)
8052 return (REG_P (op0) && REG_P (op1)
8053 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8054 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8055 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8056 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8060 avoiding_indexed_address_p (machine_mode mode)
8062 /* Avoid indexed addressing for modes that have non-indexed
8063 load/store instruction forms. */
8064 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8068 legitimate_indirect_address_p (rtx x, int strict)
8070 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8074 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8076 if (!TARGET_MACHO || !flag_pic
8077 || mode != SImode || !MEM_P (x))
8081 if (GET_CODE (x) != LO_SUM)
8083 if (!REG_P (XEXP (x, 0)))
8085 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8089 return CONSTANT_P (x);
8093 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8095 if (GET_CODE (x) != LO_SUM)
8097 if (!REG_P (XEXP (x, 0)))
8099 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8101 /* quad word addresses are restricted, and we can't use LO_SUM. */
8102 if (mode_supports_dq_form (mode))
8106 if (TARGET_ELF || TARGET_MACHO)
8110 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8112 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8113 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8114 recognizes some LO_SUM addresses as valid although this
8115 function says opposite. In most cases, LRA through different
8116 transformations can generate correct code for address reloads.
8117 It cannot manage only some LO_SUM cases. So we need to add
8118 code here saying that some addresses are still valid. */
8119 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8120 && small_toc_ref (x, VOIDmode));
8121 if (TARGET_TOC && ! large_toc_ok)
8123 if (GET_MODE_NUNITS (mode) != 1)
8125 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8126 && !(/* ??? Assume floating point reg based on mode? */
8127 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8130 return CONSTANT_P (x) || large_toc_ok;
8137 /* Try machine-dependent ways of modifying an illegitimate address
8138 to be legitimate. If we find one, return the new, valid address.
8139 This is used from only one place: `memory_address' in explow.c.
8141 OLDX is the address as it was before break_out_memory_refs was
8142 called. In some cases it is useful to look at this to decide what
8145 It is always safe for this function to do nothing. It exists to
8146 recognize opportunities to optimize the output.
8148 On RS/6000, first check for the sum of a register with a constant
8149 integer that is out of range. If so, generate code to add the
8150 constant with the low-order 16 bits masked to the register and force
8151 this result into another register (this can be done with `cau').
8152 Then generate an address of REG+(CONST&0xffff), allowing for the
8153 possibility of bit 16 being a one.
8155 Then check for the sum of a register and something not constant, try to
8156 load the other things into a register and return the sum. */
8159 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8164 if (!reg_offset_addressing_ok_p (mode)
8165 || mode_supports_dq_form (mode))
8167 if (virtual_stack_registers_memory_p (x))
8170 /* In theory we should not be seeing addresses of the form reg+0,
8171 but just in case it is generated, optimize it away. */
8172 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8173 return force_reg (Pmode, XEXP (x, 0));
8175 /* For TImode with load/store quad, restrict addresses to just a single
8176 pointer, so it works with both GPRs and VSX registers. */
8177 /* Make sure both operands are registers. */
8178 else if (GET_CODE (x) == PLUS
8179 && (mode != TImode || !TARGET_VSX))
8180 return gen_rtx_PLUS (Pmode,
8181 force_reg (Pmode, XEXP (x, 0)),
8182 force_reg (Pmode, XEXP (x, 1)));
8184 return force_reg (Pmode, x);
8186 if (SYMBOL_REF_P (x))
8188 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8190 return rs6000_legitimize_tls_address (x, model);
8202 /* As in legitimate_offset_address_p we do not assume
8203 worst-case. The mode here is just a hint as to the registers
8204 used. A TImode is usually in gprs, but may actually be in
8205 fprs. Leave worst-case scenario for reload to handle via
8206 insn constraints. PTImode is only GPRs. */
8213 if (GET_CODE (x) == PLUS
8214 && REG_P (XEXP (x, 0))
8215 && CONST_INT_P (XEXP (x, 1))
8216 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8217 >= 0x10000 - extra))
8219 HOST_WIDE_INT high_int, low_int;
8221 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8222 if (low_int >= 0x8000 - extra)
8224 high_int = INTVAL (XEXP (x, 1)) - low_int;
8225 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8226 GEN_INT (high_int)), 0);
8227 return plus_constant (Pmode, sum, low_int);
8229 else if (GET_CODE (x) == PLUS
8230 && REG_P (XEXP (x, 0))
8231 && !CONST_INT_P (XEXP (x, 1))
8232 && GET_MODE_NUNITS (mode) == 1
8233 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8234 || (/* ??? Assume floating point reg based on mode? */
8235 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8236 && !avoiding_indexed_address_p (mode))
8238 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8239 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8241 else if ((TARGET_ELF
8243 || !MACHO_DYNAMIC_NO_PIC_P
8250 && !CONST_WIDE_INT_P (x)
8251 && !CONST_DOUBLE_P (x)
8253 && GET_MODE_NUNITS (mode) == 1
8254 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8255 || (/* ??? Assume floating point reg based on mode? */
8256 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8258 rtx reg = gen_reg_rtx (Pmode);
8260 emit_insn (gen_elf_high (reg, x));
8262 emit_insn (gen_macho_high (reg, x));
8263 return gen_rtx_LO_SUM (Pmode, reg, x);
8267 && constant_pool_expr_p (x)
8268 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8269 return create_TOC_reference (x, NULL_RTX);
8274 /* Debug version of rs6000_legitimize_address. */
8276 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8282 ret = rs6000_legitimize_address (x, oldx, mode);
8283 insns = get_insns ();
8289 "\nrs6000_legitimize_address: mode %s, old code %s, "
8290 "new code %s, modified\n",
8291 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8292 GET_RTX_NAME (GET_CODE (ret)));
8294 fprintf (stderr, "Original address:\n");
8297 fprintf (stderr, "oldx:\n");
8300 fprintf (stderr, "New address:\n");
8305 fprintf (stderr, "Insns added:\n");
8306 debug_rtx_list (insns, 20);
8312 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8313 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8324 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8325 We need to emit DTP-relative relocations. */
8327 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8329 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8334 fputs ("\t.long\t", file);
8337 fputs (DOUBLE_INT_ASM_OP, file);
8342 output_addr_const (file, x);
8344 fputs ("@dtprel+0x8000", file);
8345 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8347 switch (SYMBOL_REF_TLS_MODEL (x))
8351 case TLS_MODEL_LOCAL_EXEC:
8352 fputs ("@le", file);
8354 case TLS_MODEL_INITIAL_EXEC:
8355 fputs ("@ie", file);
8357 case TLS_MODEL_GLOBAL_DYNAMIC:
8358 case TLS_MODEL_LOCAL_DYNAMIC:
8367 /* Return true if X is a symbol that refers to real (rather than emulated)
8371 rs6000_real_tls_symbol_ref_p (rtx x)
8373 return (SYMBOL_REF_P (x)
8374 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8377 /* In the name of slightly smaller debug output, and to cater to
8378 general assembler lossage, recognize various UNSPEC sequences
8379 and turn them back into a direct symbol reference. */
8382 rs6000_delegitimize_address (rtx orig_x)
8386 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8387 orig_x = XVECEXP (orig_x, 0, 0);
8389 orig_x = delegitimize_mem_from_attrs (orig_x);
8396 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8400 if (GET_CODE (y) == PLUS
8401 && GET_MODE (y) == Pmode
8402 && CONST_INT_P (XEXP (y, 1)))
8404 offset = XEXP (y, 1);
8408 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8410 y = XVECEXP (y, 0, 0);
8413 /* Do not associate thread-local symbols with the original
8414 constant pool symbol. */
8417 && CONSTANT_POOL_ADDRESS_P (y)
8418 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8422 if (offset != NULL_RTX)
8423 y = gen_rtx_PLUS (Pmode, y, offset);
8424 if (!MEM_P (orig_x))
8427 return replace_equiv_address_nv (orig_x, y);
8431 && GET_CODE (orig_x) == LO_SUM
8432 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8434 y = XEXP (XEXP (orig_x, 1), 0);
8435 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8436 return XVECEXP (y, 0, 0);
8442 /* Return true if X shouldn't be emitted into the debug info.
8443 The linker doesn't like .toc section references from
8444 .debug_* sections, so reject .toc section symbols. */
8447 rs6000_const_not_ok_for_debug_p (rtx x)
8449 if (GET_CODE (x) == UNSPEC)
8451 if (SYMBOL_REF_P (x)
8452 && CONSTANT_POOL_ADDRESS_P (x))
8454 rtx c = get_pool_constant (x);
8455 machine_mode cmode = get_pool_mode (x);
8456 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8463 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8466 rs6000_legitimate_combined_insn (rtx_insn *insn)
8468 int icode = INSN_CODE (insn);
8470 /* Reject creating doloop insns. Combine should not be allowed
8471 to create these for a number of reasons:
8472 1) In a nested loop, if combine creates one of these in an
8473 outer loop and the register allocator happens to allocate ctr
8474 to the outer loop insn, then the inner loop can't use ctr.
8475 Inner loops ought to be more highly optimized.
8476 2) Combine often wants to create one of these from what was
8477 originally a three insn sequence, first combining the three
8478 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8479 allocated ctr, the splitter takes use back to the three insn
8480 sequence. It's better to stop combine at the two insn
8482 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8483 insns, the register allocator sometimes uses floating point
8484 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8485 jump insn and output reloads are not implemented for jumps,
8486 the ctrsi/ctrdi splitters need to handle all possible cases.
8487 That's a pain, and it gets to be seriously difficult when a
8488 splitter that runs after reload needs memory to transfer from
8489 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8490 for the difficult case. It's better to not create problems
8491 in the first place. */
8492 if (icode != CODE_FOR_nothing
8493 && (icode == CODE_FOR_bdz_si
8494 || icode == CODE_FOR_bdz_di
8495 || icode == CODE_FOR_bdnz_si
8496 || icode == CODE_FOR_bdnz_di
8497 || icode == CODE_FOR_bdztf_si
8498 || icode == CODE_FOR_bdztf_di
8499 || icode == CODE_FOR_bdnztf_si
8500 || icode == CODE_FOR_bdnztf_di))
8506 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8508 static GTY(()) rtx rs6000_tls_symbol;
8510 rs6000_tls_get_addr (void)
8512 if (!rs6000_tls_symbol)
8513 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8515 return rs6000_tls_symbol;
8518 /* Construct the SYMBOL_REF for TLS GOT references. */
8520 static GTY(()) rtx rs6000_got_symbol;
8522 rs6000_got_sym (void)
8524 if (!rs6000_got_symbol)
8526 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8527 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8528 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8531 return rs6000_got_symbol;
8534 /* AIX Thread-Local Address support. */
8537 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8539 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8543 name = XSTR (addr, 0);
8544 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8545 or the symbol will be in TLS private data section. */
8546 if (name[strlen (name) - 1] != ']'
8547 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8548 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8550 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8551 strcpy (tlsname, name);
8553 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8554 tlsaddr = copy_rtx (addr);
8555 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8560 /* Place addr into TOC constant pool. */
8561 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8563 /* Output the TOC entry and create the MEM referencing the value. */
8564 if (constant_pool_expr_p (XEXP (sym, 0))
8565 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8567 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8568 mem = gen_const_mem (Pmode, tocref);
8569 set_mem_alias_set (mem, get_TOC_alias_set ());
8574 /* Use global-dynamic for local-dynamic. */
8575 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8576 || model == TLS_MODEL_LOCAL_DYNAMIC)
8578 /* Create new TOC reference for @m symbol. */
8579 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8580 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8581 strcpy (tlsname, "*LCM");
8582 strcat (tlsname, name + 3);
8583 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8584 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8585 tocref = create_TOC_reference (modaddr, NULL_RTX);
8586 rtx modmem = gen_const_mem (Pmode, tocref);
8587 set_mem_alias_set (modmem, get_TOC_alias_set ());
8589 rtx modreg = gen_reg_rtx (Pmode);
8590 emit_insn (gen_rtx_SET (modreg, modmem));
8592 tmpreg = gen_reg_rtx (Pmode);
8593 emit_insn (gen_rtx_SET (tmpreg, mem));
8595 dest = gen_reg_rtx (Pmode);
8597 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8599 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8602 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8603 else if (TARGET_32BIT)
8605 tlsreg = gen_reg_rtx (SImode);
8606 emit_insn (gen_tls_get_tpointer (tlsreg));
8609 tlsreg = gen_rtx_REG (DImode, 13);
8611 /* Load the TOC value into temporary register. */
8612 tmpreg = gen_reg_rtx (Pmode);
8613 emit_insn (gen_rtx_SET (tmpreg, mem));
8614 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8615 gen_rtx_MINUS (Pmode, addr, tlsreg));
8617 /* Add TOC symbol value to TLS pointer. */
8618 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8623 /* Output arg setup instructions for a !TARGET_TLS_MARKERS
8624 __tls_get_addr call. */
8627 rs6000_output_tlsargs (rtx *operands)
8629 /* Set up operands for output_asm_insn, without modifying OPERANDS. */
8632 /* The set dest of the call, ie. r3, which is also the first arg reg. */
8633 op[0] = operands[0];
8634 /* The TLS symbol from global_tlsarg stashed as CALL operand 2. */
8635 op[1] = XVECEXP (operands[2], 0, 0);
8636 if (XINT (operands[2], 1) == UNSPEC_TLSGD)
8638 /* The GOT register. */
8639 op[2] = XVECEXP (operands[2], 0, 1);
8640 if (TARGET_CMODEL != CMODEL_SMALL)
8641 output_asm_insn ("addis %0,%2,%1@got@tlsgd@ha\n\t"
8642 "addi %0,%0,%1@got@tlsgd@l", op);
8644 output_asm_insn ("addi %0,%2,%1@got@tlsgd", op);
8646 else if (XINT (operands[2], 1) == UNSPEC_TLSLD)
8648 if (TARGET_CMODEL != CMODEL_SMALL)
8649 output_asm_insn ("addis %0,%1,%&@got@tlsld@ha\n\t"
8650 "addi %0,%0,%&@got@tlsld@l", op);
8652 output_asm_insn ("addi %0,%1,%&@got@tlsld", op);
8658 /* Passes the tls arg value for global dynamic and local dynamic
8659 emit_library_call_value in rs6000_legitimize_tls_address to
8660 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8661 marker relocs put on __tls_get_addr calls. */
8662 static rtx global_tlsarg;
8664 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8665 this (thread-local) address. */
8668 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8673 return rs6000_legitimize_tls_address_aix (addr, model);
8675 dest = gen_reg_rtx (Pmode);
8676 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
8682 tlsreg = gen_rtx_REG (Pmode, 13);
8683 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8687 tlsreg = gen_rtx_REG (Pmode, 2);
8688 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8692 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8696 tmp = gen_reg_rtx (Pmode);
8699 tlsreg = gen_rtx_REG (Pmode, 13);
8700 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8704 tlsreg = gen_rtx_REG (Pmode, 2);
8705 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8709 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8711 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8716 rtx got, tga, tmp1, tmp2;
8718 /* We currently use relocations like @got@tlsgd for tls, which
8719 means the linker will handle allocation of tls entries, placing
8720 them in the .got section. So use a pointer to the .got section,
8721 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8722 or to secondary GOT sections used by 32-bit -fPIC. */
8724 got = gen_rtx_REG (Pmode, 2);
8728 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8731 rtx gsym = rs6000_got_sym ();
8732 got = gen_reg_rtx (Pmode);
8734 rs6000_emit_move (got, gsym, Pmode);
8739 tmp1 = gen_reg_rtx (Pmode);
8740 tmp2 = gen_reg_rtx (Pmode);
8741 mem = gen_const_mem (Pmode, tmp1);
8742 lab = gen_label_rtx ();
8743 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8744 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8745 if (TARGET_LINK_STACK)
8746 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8747 emit_move_insn (tmp2, mem);
8748 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8749 set_unique_reg_note (last, REG_EQUAL, gsym);
8754 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8756 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8758 tga = rs6000_tls_get_addr ();
8759 global_tlsarg = arg;
8760 if (TARGET_TLS_MARKERS)
8762 rtx argreg = gen_rtx_REG (Pmode, 3);
8763 emit_insn (gen_rtx_SET (argreg, arg));
8764 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
8768 emit_library_call_value (tga, dest, LCT_CONST, Pmode);
8769 global_tlsarg = NULL_RTX;
8771 /* Make a note so that the result of this call can be CSEd. */
8772 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8773 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8774 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8776 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8778 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8779 tga = rs6000_tls_get_addr ();
8780 tmp1 = gen_reg_rtx (Pmode);
8781 global_tlsarg = arg;
8782 if (TARGET_TLS_MARKERS)
8784 rtx argreg = gen_rtx_REG (Pmode, 3);
8785 emit_insn (gen_rtx_SET (argreg, arg));
8786 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
8790 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode);
8791 global_tlsarg = NULL_RTX;
8793 /* Make a note so that the result of this call can be CSEd. */
8794 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8795 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8796 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8798 if (rs6000_tls_size == 16)
8801 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8803 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8805 else if (rs6000_tls_size == 32)
8807 tmp2 = gen_reg_rtx (Pmode);
8809 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8811 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8814 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8816 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8820 tmp2 = gen_reg_rtx (Pmode);
8822 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8824 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8826 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8832 /* IE, or 64-bit offset LE. */
8833 tmp2 = gen_reg_rtx (Pmode);
8835 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8837 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8840 insn = gen_tls_tls_64 (dest, tmp2, addr);
8842 insn = gen_tls_tls_32 (dest, tmp2, addr);
8850 /* Only create the global variable for the stack protect guard if we are using
8851 the global flavor of that guard. */
8853 rs6000_init_stack_protect_guard (void)
8855 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8856 return default_stack_protect_guard ();
8861 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8864 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8866 if (GET_CODE (x) == HIGH
8867 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8870 /* A TLS symbol in the TOC cannot contain a sum. */
8871 if (GET_CODE (x) == CONST
8872 && GET_CODE (XEXP (x, 0)) == PLUS
8873 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8874 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8877 /* Do not place an ELF TLS symbol in the constant pool. */
8878 return TARGET_ELF && tls_referenced_p (x);
8881 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8882 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8883 can be addressed relative to the toc pointer. */
8886 use_toc_relative_ref (rtx sym, machine_mode mode)
8888 return ((constant_pool_expr_p (sym)
8889 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8890 get_pool_mode (sym)))
8891 || (TARGET_CMODEL == CMODEL_MEDIUM
8892 && SYMBOL_REF_LOCAL_P (sym)
8893 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8896 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8897 that is a valid memory address for an instruction.
8898 The MODE argument is the machine mode for the MEM expression
8899 that wants to use this address.
8901 On the RS/6000, there are four valid address: a SYMBOL_REF that
8902 refers to a constant pool entry of an address (or the sum of it
8903 plus a constant), a short (16-bit signed) constant plus a register,
8904 the sum of two registers, or a register indirect, possibly with an
8905 auto-increment. For DFmode, DDmode and DImode with a constant plus
8906 register, we must ensure that both words are addressable or PowerPC64
8907 with offset word aligned.
8909 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8910 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8911 because adjacent memory cells are accessed by adding word-sized offsets
8912 during assembly output. */
8914 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8916 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8917 bool quad_offset_p = mode_supports_dq_form (mode);
8919 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8920 if (VECTOR_MEM_ALTIVEC_P (mode)
8921 && GET_CODE (x) == AND
8922 && CONST_INT_P (XEXP (x, 1))
8923 && INTVAL (XEXP (x, 1)) == -16)
8926 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8928 if (legitimate_indirect_address_p (x, reg_ok_strict))
8931 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8932 && mode_supports_pre_incdec_p (mode)
8933 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8935 /* Handle restricted vector d-form offsets in ISA 3.0. */
8938 if (quad_address_p (x, mode, reg_ok_strict))
8941 else if (virtual_stack_registers_memory_p (x))
8944 else if (reg_offset_p)
8946 if (legitimate_small_data_p (mode, x))
8948 if (legitimate_constant_pool_address_p (x, mode,
8949 reg_ok_strict || lra_in_progress))
8953 /* For TImode, if we have TImode in VSX registers, only allow register
8954 indirect addresses. This will allow the values to go in either GPRs
8955 or VSX registers without reloading. The vector types would tend to
8956 go into VSX registers, so we allow REG+REG, while TImode seems
8957 somewhat split, in that some uses are GPR based, and some VSX based. */
8958 /* FIXME: We could loosen this by changing the following to
8959 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8960 but currently we cannot allow REG+REG addressing for TImode. See
8961 PR72827 for complete details on how this ends up hoodwinking DSE. */
8962 if (mode == TImode && TARGET_VSX)
8964 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8967 && GET_CODE (x) == PLUS
8968 && REG_P (XEXP (x, 0))
8969 && (XEXP (x, 0) == virtual_stack_vars_rtx
8970 || XEXP (x, 0) == arg_pointer_rtx)
8971 && CONST_INT_P (XEXP (x, 1)))
8973 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8975 if (!FLOAT128_2REG_P (mode)
8976 && (TARGET_HARD_FLOAT
8978 || (mode != DFmode && mode != DDmode))
8979 && (TARGET_POWERPC64 || mode != DImode)
8980 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8982 && !avoiding_indexed_address_p (mode)
8983 && legitimate_indexed_address_p (x, reg_ok_strict))
8985 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8986 && mode_supports_pre_modify_p (mode)
8987 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8988 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8989 reg_ok_strict, false)
8990 || (!avoiding_indexed_address_p (mode)
8991 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8992 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8994 if (reg_offset_p && !quad_offset_p
8995 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9000 /* Debug version of rs6000_legitimate_address_p. */
9002 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9005 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9007 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9008 "strict = %d, reload = %s, code = %s\n",
9009 ret ? "true" : "false",
9010 GET_MODE_NAME (mode),
9012 (reload_completed ? "after" : "before"),
9013 GET_RTX_NAME (GET_CODE (x)));
9019 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9022 rs6000_mode_dependent_address_p (const_rtx addr,
9023 addr_space_t as ATTRIBUTE_UNUSED)
9025 return rs6000_mode_dependent_address_ptr (addr);
9028 /* Go to LABEL if ADDR (a legitimate address expression)
9029 has an effect that depends on the machine mode it is used for.
9031 On the RS/6000 this is true of all integral offsets (since AltiVec
9032 and VSX modes don't allow them) or is a pre-increment or decrement.
9034 ??? Except that due to conceptual problems in offsettable_address_p
9035 we can't really report the problems of integral offsets. So leave
9036 this assuming that the adjustable offset must be valid for the
9037 sub-words of a TFmode operand, which is what we had before. */
9040 rs6000_mode_dependent_address (const_rtx addr)
9042 switch (GET_CODE (addr))
9045 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9046 is considered a legitimate address before reload, so there
9047 are no offset restrictions in that case. Note that this
9048 condition is safe in strict mode because any address involving
9049 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9050 been rejected as illegitimate. */
9051 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9052 && XEXP (addr, 0) != arg_pointer_rtx
9053 && CONST_INT_P (XEXP (addr, 1)))
9055 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9056 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
9061 /* Anything in the constant pool is sufficiently aligned that
9062 all bytes have the same high part address. */
9063 return !legitimate_constant_pool_address_p (addr, QImode, false);
9065 /* Auto-increment cases are now treated generically in recog.c. */
9067 return TARGET_UPDATE;
9069 /* AND is only allowed in Altivec loads. */
9080 /* Debug version of rs6000_mode_dependent_address. */
9082 rs6000_debug_mode_dependent_address (const_rtx addr)
9084 bool ret = rs6000_mode_dependent_address (addr);
9086 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9087 ret ? "true" : "false");
9093 /* Implement FIND_BASE_TERM. */
9096 rs6000_find_base_term (rtx op)
9101 if (GET_CODE (base) == CONST)
9102 base = XEXP (base, 0);
9103 if (GET_CODE (base) == PLUS)
9104 base = XEXP (base, 0);
9105 if (GET_CODE (base) == UNSPEC)
9106 switch (XINT (base, 1))
9109 case UNSPEC_MACHOPIC_OFFSET:
9110 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9111 for aliasing purposes. */
9112 return XVECEXP (base, 0, 0);
9118 /* More elaborate version of recog's offsettable_memref_p predicate
9119 that works around the ??? note of rs6000_mode_dependent_address.
9120 In particular it accepts
9122 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9124 in 32-bit mode, that the recog predicate rejects. */
9127 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9134 /* First mimic offsettable_memref_p. */
9135 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9138 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9139 the latter predicate knows nothing about the mode of the memory
9140 reference and, therefore, assumes that it is the largest supported
9141 mode (TFmode). As a consequence, legitimate offsettable memory
9142 references are rejected. rs6000_legitimate_offset_address_p contains
9143 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9144 at least with a little bit of help here given that we know the
9145 actual registers used. */
9146 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9147 || GET_MODE_SIZE (reg_mode) == 4);
9148 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9149 strict, worst_case);
9152 /* Determine the reassociation width to be used in reassociate_bb.
9153 This takes into account how many parallel operations we
9154 can actually do of a given type, and also the latency.
9158 vect add/sub/mul 2/cycle
9159 fp add/sub/mul 2/cycle
9164 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9167 switch (rs6000_tune)
9169 case PROCESSOR_POWER8:
9170 case PROCESSOR_POWER9:
9171 if (DECIMAL_FLOAT_MODE_P (mode))
9173 if (VECTOR_MODE_P (mode))
9175 if (INTEGRAL_MODE_P (mode))
9177 if (FLOAT_MODE_P (mode))
9186 /* Change register usage conditional on target flags. */
9188 rs6000_conditional_register_usage (void)
9192 if (TARGET_DEBUG_TARGET)
9193 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9195 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9197 fixed_regs[13] = call_used_regs[13]
9198 = call_really_used_regs[13] = 1;
9200 /* Conditionally disable FPRs. */
9201 if (TARGET_SOFT_FLOAT)
9202 for (i = 32; i < 64; i++)
9203 fixed_regs[i] = call_used_regs[i]
9204 = call_really_used_regs[i] = 1;
9206 /* The TOC register is not killed across calls in a way that is
9207 visible to the compiler. */
9208 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9209 call_really_used_regs[2] = 0;
9211 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9212 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9214 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9215 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9216 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9217 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9219 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9220 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9221 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9222 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9224 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9225 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9226 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9228 if (!TARGET_ALTIVEC && !TARGET_VSX)
9230 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9231 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9232 call_really_used_regs[VRSAVE_REGNO] = 1;
9235 if (TARGET_ALTIVEC || TARGET_VSX)
9236 global_regs[VSCR_REGNO] = 1;
9238 if (TARGET_ALTIVEC_ABI)
9240 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9241 call_used_regs[i] = call_really_used_regs[i] = 1;
9243 /* AIX reserves VR20:31 in non-extended ABI mode. */
9245 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9246 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9251 /* Output insns to set DEST equal to the constant SOURCE as a series of
9252 lis, ori and shl instructions and return TRUE. */
9255 rs6000_emit_set_const (rtx dest, rtx source)
9257 machine_mode mode = GET_MODE (dest);
9262 gcc_checking_assert (CONST_INT_P (source));
9263 c = INTVAL (source);
9268 emit_insn (gen_rtx_SET (dest, source));
9272 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9274 emit_insn (gen_rtx_SET (copy_rtx (temp),
9275 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9276 emit_insn (gen_rtx_SET (dest,
9277 gen_rtx_IOR (SImode, copy_rtx (temp),
9278 GEN_INT (c & 0xffff))));
9282 if (!TARGET_POWERPC64)
9286 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9288 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9290 emit_move_insn (hi, GEN_INT (c >> 32));
9291 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9292 emit_move_insn (lo, GEN_INT (c));
9295 rs6000_emit_set_long_const (dest, c);
9302 insn = get_last_insn ();
9303 set = single_set (insn);
9304 if (! CONSTANT_P (SET_SRC (set)))
9305 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9310 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9311 Output insns to set DEST equal to the constant C as a series of
9312 lis, ori and shl instructions. */
9315 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9318 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9328 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9329 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9330 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9332 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9333 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9335 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9337 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9338 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9340 emit_move_insn (dest,
9341 gen_rtx_IOR (DImode, copy_rtx (temp),
9344 else if (ud3 == 0 && ud4 == 0)
9346 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9348 gcc_assert (ud2 & 0x8000);
9349 emit_move_insn (copy_rtx (temp),
9350 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9352 emit_move_insn (copy_rtx (temp),
9353 gen_rtx_IOR (DImode, copy_rtx (temp),
9355 emit_move_insn (dest,
9356 gen_rtx_ZERO_EXTEND (DImode,
9357 gen_lowpart (SImode,
9360 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9361 || (ud4 == 0 && ! (ud3 & 0x8000)))
9363 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9365 emit_move_insn (copy_rtx (temp),
9366 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9368 emit_move_insn (copy_rtx (temp),
9369 gen_rtx_IOR (DImode, copy_rtx (temp),
9371 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9372 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9375 emit_move_insn (dest,
9376 gen_rtx_IOR (DImode, copy_rtx (temp),
9381 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9383 emit_move_insn (copy_rtx (temp),
9384 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9386 emit_move_insn (copy_rtx (temp),
9387 gen_rtx_IOR (DImode, copy_rtx (temp),
9390 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9391 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9394 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9395 gen_rtx_IOR (DImode, copy_rtx (temp),
9396 GEN_INT (ud2 << 16)));
9398 emit_move_insn (dest,
9399 gen_rtx_IOR (DImode, copy_rtx (temp),
9404 /* Helper for the following. Get rid of [r+r] memory refs
9405 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9408 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9410 if (MEM_P (operands[0])
9411 && !REG_P (XEXP (operands[0], 0))
9412 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9413 GET_MODE (operands[0]), false))
9415 = replace_equiv_address (operands[0],
9416 copy_addr_to_reg (XEXP (operands[0], 0)));
9418 if (MEM_P (operands[1])
9419 && !REG_P (XEXP (operands[1], 0))
9420 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9421 GET_MODE (operands[1]), false))
9423 = replace_equiv_address (operands[1],
9424 copy_addr_to_reg (XEXP (operands[1], 0)));
9427 /* Generate a vector of constants to permute MODE for a little-endian
9428 storage operation by swapping the two halves of a vector. */
9430 rs6000_const_vec (machine_mode mode)
9458 v = rtvec_alloc (subparts);
9460 for (i = 0; i < subparts / 2; ++i)
9461 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9462 for (i = subparts / 2; i < subparts; ++i)
9463 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9468 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9471 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9473 /* Scalar permutations are easier to express in integer modes rather than
9474 floating-point modes, so cast them here. We use V1TImode instead
9475 of TImode to ensure that the values don't go through GPRs. */
9476 if (FLOAT128_VECTOR_P (mode))
9478 dest = gen_lowpart (V1TImode, dest);
9479 source = gen_lowpart (V1TImode, source);
9483 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9485 if (mode == TImode || mode == V1TImode)
9486 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9490 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9491 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9495 /* Emit a little-endian load from vector memory location SOURCE to VSX
9496 register DEST in mode MODE. The load is done with two permuting
9497 insn's that represent an lxvd2x and xxpermdi. */
9499 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9501 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9503 if (mode == TImode || mode == V1TImode)
9506 dest = gen_lowpart (V2DImode, dest);
9507 source = adjust_address (source, V2DImode, 0);
9510 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9511 rs6000_emit_le_vsx_permute (tmp, source, mode);
9512 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9515 /* Emit a little-endian store to vector memory location DEST from VSX
9516 register SOURCE in mode MODE. The store is done with two permuting
9517 insn's that represent an xxpermdi and an stxvd2x. */
9519 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9521 /* This should never be called during or after LRA, because it does
9522 not re-permute the source register. It is intended only for use
9524 gcc_assert (!lra_in_progress && !reload_completed);
9526 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9528 if (mode == TImode || mode == V1TImode)
9531 dest = adjust_address (dest, V2DImode, 0);
9532 source = gen_lowpart (V2DImode, source);
9535 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9536 rs6000_emit_le_vsx_permute (tmp, source, mode);
9537 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9540 /* Emit a sequence representing a little-endian VSX load or store,
9541 moving data from SOURCE to DEST in mode MODE. This is done
9542 separately from rs6000_emit_move to ensure it is called only
9543 during expand. LE VSX loads and stores introduced later are
9544 handled with a split. The expand-time RTL generation allows
9545 us to optimize away redundant pairs of register-permutes. */
9547 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9549 gcc_assert (!BYTES_BIG_ENDIAN
9550 && VECTOR_MEM_VSX_P (mode)
9551 && !TARGET_P9_VECTOR
9552 && !gpr_or_gpr_p (dest, source)
9553 && (MEM_P (source) ^ MEM_P (dest)));
9557 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9558 rs6000_emit_le_vsx_load (dest, source, mode);
9562 if (!REG_P (source))
9563 source = force_reg (mode, source);
9564 rs6000_emit_le_vsx_store (dest, source, mode);
9568 /* Return whether a SFmode or SImode move can be done without converting one
9569 mode to another. This arrises when we have:
9571 (SUBREG:SF (REG:SI ...))
9572 (SUBREG:SI (REG:SF ...))
9574 and one of the values is in a floating point/vector register, where SFmode
9575 scalars are stored in DFmode format. */
9578 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9580 if (TARGET_ALLOW_SF_SUBREG)
9583 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9586 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9589 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9590 if (SUBREG_P (dest))
9592 rtx dest_subreg = SUBREG_REG (dest);
9593 rtx src_subreg = SUBREG_REG (src);
9594 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9601 /* Helper function to change moves with:
9603 (SUBREG:SF (REG:SI)) and
9604 (SUBREG:SI (REG:SF))
9606 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9607 values are stored as DFmode values in the VSX registers. We need to convert
9608 the bits before we can use a direct move or operate on the bits in the
9609 vector register as an integer type.
9611 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9614 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9616 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9617 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9618 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9620 rtx inner_source = SUBREG_REG (source);
9621 machine_mode inner_mode = GET_MODE (inner_source);
9623 if (mode == SImode && inner_mode == SFmode)
9625 emit_insn (gen_movsi_from_sf (dest, inner_source));
9629 if (mode == SFmode && inner_mode == SImode)
9631 emit_insn (gen_movsf_from_si (dest, inner_source));
9639 /* Emit a move from SOURCE to DEST in mode MODE. */
9641 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9645 operands[1] = source;
9647 if (TARGET_DEBUG_ADDR)
9650 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9651 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9652 GET_MODE_NAME (mode),
9655 can_create_pseudo_p ());
9657 fprintf (stderr, "source:\n");
9661 /* Check that we get CONST_WIDE_INT only when we should. */
9662 if (CONST_WIDE_INT_P (operands[1])
9663 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9666 #ifdef HAVE_AS_GNU_ATTRIBUTE
9667 /* If we use a long double type, set the flags in .gnu_attribute that say
9668 what the long double type is. This is to allow the linker's warning
9669 message for the wrong long double to be useful, even if the function does
9670 not do a call (for example, doing a 128-bit add on power9 if the long
9671 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9672 used if they aren't the default long dobule type. */
9673 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9675 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9676 rs6000_passes_float = rs6000_passes_long_double = true;
9678 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9679 rs6000_passes_float = rs6000_passes_long_double = true;
9683 /* See if we need to special case SImode/SFmode SUBREG moves. */
9684 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9685 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9688 /* Check if GCC is setting up a block move that will end up using FP
9689 registers as temporaries. We must make sure this is acceptable. */
9690 if (MEM_P (operands[0])
9691 && MEM_P (operands[1])
9693 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9694 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9695 && ! (rs6000_slow_unaligned_access (SImode,
9696 (MEM_ALIGN (operands[0]) > 32
9697 ? 32 : MEM_ALIGN (operands[0])))
9698 || rs6000_slow_unaligned_access (SImode,
9699 (MEM_ALIGN (operands[1]) > 32
9700 ? 32 : MEM_ALIGN (operands[1]))))
9701 && ! MEM_VOLATILE_P (operands [0])
9702 && ! MEM_VOLATILE_P (operands [1]))
9704 emit_move_insn (adjust_address (operands[0], SImode, 0),
9705 adjust_address (operands[1], SImode, 0));
9706 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9707 adjust_address (copy_rtx (operands[1]), SImode, 4));
9711 if (can_create_pseudo_p () && MEM_P (operands[0])
9712 && !gpc_reg_operand (operands[1], mode))
9713 operands[1] = force_reg (mode, operands[1]);
9715 /* Recognize the case where operand[1] is a reference to thread-local
9716 data and load its address to a register. */
9717 if (tls_referenced_p (operands[1]))
9719 enum tls_model model;
9720 rtx tmp = operands[1];
9723 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9725 addend = XEXP (XEXP (tmp, 0), 1);
9726 tmp = XEXP (XEXP (tmp, 0), 0);
9729 gcc_assert (SYMBOL_REF_P (tmp));
9730 model = SYMBOL_REF_TLS_MODEL (tmp);
9731 gcc_assert (model != 0);
9733 tmp = rs6000_legitimize_tls_address (tmp, model);
9736 tmp = gen_rtx_PLUS (mode, tmp, addend);
9737 tmp = force_operand (tmp, operands[0]);
9742 /* 128-bit constant floating-point values on Darwin should really be loaded
9743 as two parts. However, this premature splitting is a problem when DFmode
9744 values can go into Altivec registers. */
9745 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9746 && !reg_addr[DFmode].scalar_in_vmx_p)
9748 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9749 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9751 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9752 GET_MODE_SIZE (DFmode)),
9753 simplify_gen_subreg (DFmode, operands[1], mode,
9754 GET_MODE_SIZE (DFmode)),
9759 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9760 p1:SD) if p1 is not of floating point class and p0 is spilled as
9761 we can have no analogous movsd_store for this. */
9762 if (lra_in_progress && mode == DDmode
9763 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9764 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9765 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9766 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9769 int regno = REGNO (SUBREG_REG (operands[1]));
9771 if (!HARD_REGISTER_NUM_P (regno))
9773 cl = reg_preferred_class (regno);
9774 regno = reg_renumber[regno];
9776 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9778 if (regno >= 0 && ! FP_REGNO_P (regno))
9781 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9782 operands[1] = SUBREG_REG (operands[1]);
9787 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9788 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9789 && (REG_P (operands[1])
9790 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9792 int regno = reg_or_subregno (operands[1]);
9795 if (!HARD_REGISTER_NUM_P (regno))
9797 cl = reg_preferred_class (regno);
9798 gcc_assert (cl != NO_REGS);
9799 regno = reg_renumber[regno];
9801 regno = ira_class_hard_regs[cl][0];
9803 if (FP_REGNO_P (regno))
9805 if (GET_MODE (operands[0]) != DDmode)
9806 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9807 emit_insn (gen_movsd_store (operands[0], operands[1]));
9809 else if (INT_REGNO_P (regno))
9810 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9815 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9816 p:DD)) if p0 is not of floating point class and p1 is spilled as
9817 we can have no analogous movsd_load for this. */
9818 if (lra_in_progress && mode == DDmode
9819 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9820 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9821 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9822 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9825 int regno = REGNO (SUBREG_REG (operands[0]));
9827 if (!HARD_REGISTER_NUM_P (regno))
9829 cl = reg_preferred_class (regno);
9830 regno = reg_renumber[regno];
9832 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9834 if (regno >= 0 && ! FP_REGNO_P (regno))
9837 operands[0] = SUBREG_REG (operands[0]);
9838 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9843 && (REG_P (operands[0])
9844 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9845 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9846 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9848 int regno = reg_or_subregno (operands[0]);
9851 if (!HARD_REGISTER_NUM_P (regno))
9853 cl = reg_preferred_class (regno);
9854 gcc_assert (cl != NO_REGS);
9855 regno = reg_renumber[regno];
9857 regno = ira_class_hard_regs[cl][0];
9859 if (FP_REGNO_P (regno))
9861 if (GET_MODE (operands[1]) != DDmode)
9862 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9863 emit_insn (gen_movsd_load (operands[0], operands[1]));
9865 else if (INT_REGNO_P (regno))
9866 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9872 /* FIXME: In the long term, this switch statement should go away
9873 and be replaced by a sequence of tests based on things like
9879 if (CONSTANT_P (operands[1])
9880 && !CONST_INT_P (operands[1]))
9881 operands[1] = force_const_mem (mode, operands[1]);
9888 if (FLOAT128_2REG_P (mode))
9889 rs6000_eliminate_indexed_memrefs (operands);
9896 if (CONSTANT_P (operands[1])
9897 && ! easy_fp_constant (operands[1], mode))
9898 operands[1] = force_const_mem (mode, operands[1]);
9908 if (CONSTANT_P (operands[1])
9909 && !easy_vector_constant (operands[1], mode))
9910 operands[1] = force_const_mem (mode, operands[1]);
9915 /* Use default pattern for address of ELF small data */
9918 && DEFAULT_ABI == ABI_V4
9919 && (SYMBOL_REF_P (operands[1])
9920 || GET_CODE (operands[1]) == CONST)
9921 && small_data_operand (operands[1], mode))
9923 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9927 if (DEFAULT_ABI == ABI_V4
9928 && mode == Pmode && mode == SImode
9929 && flag_pic == 1 && got_operand (operands[1], mode))
9931 emit_insn (gen_movsi_got (operands[0], operands[1]));
9935 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9939 && CONSTANT_P (operands[1])
9940 && GET_CODE (operands[1]) != HIGH
9941 && !CONST_INT_P (operands[1]))
9943 rtx target = (!can_create_pseudo_p ()
9945 : gen_reg_rtx (mode));
9947 /* If this is a function address on -mcall-aixdesc,
9948 convert it to the address of the descriptor. */
9949 if (DEFAULT_ABI == ABI_AIX
9950 && SYMBOL_REF_P (operands[1])
9951 && XSTR (operands[1], 0)[0] == '.')
9953 const char *name = XSTR (operands[1], 0);
9955 while (*name == '.')
9957 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9958 CONSTANT_POOL_ADDRESS_P (new_ref)
9959 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9960 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9961 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9962 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9963 operands[1] = new_ref;
9966 if (DEFAULT_ABI == ABI_DARWIN)
9969 if (MACHO_DYNAMIC_NO_PIC_P)
9971 /* Take care of any required data indirection. */
9972 operands[1] = rs6000_machopic_legitimize_pic_address (
9973 operands[1], mode, operands[0]);
9974 if (operands[0] != operands[1])
9975 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9979 emit_insn (gen_macho_high (target, operands[1]));
9980 emit_insn (gen_macho_low (operands[0], target, operands[1]));
9984 emit_insn (gen_elf_high (target, operands[1]));
9985 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9989 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9990 and we have put it in the TOC, we just need to make a TOC-relative
9993 && SYMBOL_REF_P (operands[1])
9994 && use_toc_relative_ref (operands[1], mode))
9995 operands[1] = create_TOC_reference (operands[1], operands[0]);
9996 else if (mode == Pmode
9997 && CONSTANT_P (operands[1])
9998 && GET_CODE (operands[1]) != HIGH
9999 && ((REG_P (operands[0])
10000 && FP_REGNO_P (REGNO (operands[0])))
10001 || !CONST_INT_P (operands[1])
10002 || (num_insns_constant (operands[1], mode)
10003 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10004 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10005 && (TARGET_CMODEL == CMODEL_SMALL
10006 || can_create_pseudo_p ()
10007 || (REG_P (operands[0])
10008 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10012 /* Darwin uses a special PIC legitimizer. */
10013 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10016 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10018 if (operands[0] != operands[1])
10019 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10024 /* If we are to limit the number of things we put in the TOC and
10025 this is a symbol plus a constant we can add in one insn,
10026 just put the symbol in the TOC and add the constant. */
10027 if (GET_CODE (operands[1]) == CONST
10028 && TARGET_NO_SUM_IN_TOC
10029 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10030 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10031 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10032 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
10033 && ! side_effects_p (operands[0]))
10036 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10037 rtx other = XEXP (XEXP (operands[1], 0), 1);
10039 sym = force_reg (mode, sym);
10040 emit_insn (gen_add3_insn (operands[0], sym, other));
10044 operands[1] = force_const_mem (mode, operands[1]);
10047 && SYMBOL_REF_P (XEXP (operands[1], 0))
10048 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10050 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10052 operands[1] = gen_const_mem (mode, tocref);
10053 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10059 if (!VECTOR_MEM_VSX_P (TImode))
10060 rs6000_eliminate_indexed_memrefs (operands);
10064 rs6000_eliminate_indexed_memrefs (operands);
10068 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10071 /* Above, we may have called force_const_mem which may have returned
10072 an invalid address. If we can, fix this up; otherwise, reload will
10073 have to deal with it. */
10074 if (MEM_P (operands[1]))
10075 operands[1] = validize_mem (operands[1]);
10077 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10080 /* Nonzero if we can use a floating-point register to pass this arg. */
10081 #define USE_FP_FOR_ARG_P(CUM,MODE) \
10082 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
10083 && (CUM)->fregno <= FP_ARG_MAX_REG \
10084 && TARGET_HARD_FLOAT)
10086 /* Nonzero if we can use an AltiVec register to pass this arg. */
10087 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
10088 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
10089 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
10090 && TARGET_ALTIVEC_ABI \
10093 /* Walk down the type tree of TYPE counting consecutive base elements.
10094 If *MODEP is VOIDmode, then set it to the first valid floating point
10095 or vector type. If a non-floating point or vector type is found, or
10096 if a floating point or vector type that doesn't match a non-VOIDmode
10097 *MODEP is found, then return -1, otherwise return the count in the
10101 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
10104 HOST_WIDE_INT size;
10106 switch (TREE_CODE (type))
10109 mode = TYPE_MODE (type);
10110 if (!SCALAR_FLOAT_MODE_P (mode))
10113 if (*modep == VOIDmode)
10116 if (*modep == mode)
10122 mode = TYPE_MODE (TREE_TYPE (type));
10123 if (!SCALAR_FLOAT_MODE_P (mode))
10126 if (*modep == VOIDmode)
10129 if (*modep == mode)
10135 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
10138 /* Use V4SImode as representative of all 128-bit vector types. */
10139 size = int_size_in_bytes (type);
10149 if (*modep == VOIDmode)
10152 /* Vector modes are considered to be opaque: two vectors are
10153 equivalent for the purposes of being homogeneous aggregates
10154 if they are the same size. */
10155 if (*modep == mode)
10163 tree index = TYPE_DOMAIN (type);
10165 /* Can't handle incomplete types nor sizes that are not
10167 if (!COMPLETE_TYPE_P (type)
10168 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10171 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
10174 || !TYPE_MAX_VALUE (index)
10175 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10176 || !TYPE_MIN_VALUE (index)
10177 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10181 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10182 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10184 /* There must be no padding. */
10185 if (wi::to_wide (TYPE_SIZE (type))
10186 != count * GET_MODE_BITSIZE (*modep))
10198 /* Can't handle incomplete types nor sizes that are not
10200 if (!COMPLETE_TYPE_P (type)
10201 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10204 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10206 if (TREE_CODE (field) != FIELD_DECL)
10209 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10212 count += sub_count;
10215 /* There must be no padding. */
10216 if (wi::to_wide (TYPE_SIZE (type))
10217 != count * GET_MODE_BITSIZE (*modep))
10224 case QUAL_UNION_TYPE:
10226 /* These aren't very interesting except in a degenerate case. */
10231 /* Can't handle incomplete types nor sizes that are not
10233 if (!COMPLETE_TYPE_P (type)
10234 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10237 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10239 if (TREE_CODE (field) != FIELD_DECL)
10242 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10245 count = count > sub_count ? count : sub_count;
10248 /* There must be no padding. */
10249 if (wi::to_wide (TYPE_SIZE (type))
10250 != count * GET_MODE_BITSIZE (*modep))
10263 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10264 float or vector aggregate that shall be passed in FP/vector registers
10265 according to the ELFv2 ABI, return the homogeneous element mode in
10266 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10268 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10271 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10272 machine_mode *elt_mode,
10275 /* Note that we do not accept complex types at the top level as
10276 homogeneous aggregates; these types are handled via the
10277 targetm.calls.split_complex_arg mechanism. Complex types
10278 can be elements of homogeneous aggregates, however. */
10279 if (TARGET_HARD_FLOAT && DEFAULT_ABI == ABI_ELFv2 && type
10280 && AGGREGATE_TYPE_P (type))
10282 machine_mode field_mode = VOIDmode;
10283 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10285 if (field_count > 0)
10287 int reg_size = ALTIVEC_OR_VSX_VECTOR_MODE (field_mode) ? 16 : 8;
10288 int field_size = ROUND_UP (GET_MODE_SIZE (field_mode), reg_size);
10290 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10291 up to AGGR_ARG_NUM_REG registers. */
10292 if (field_count * field_size <= AGGR_ARG_NUM_REG * reg_size)
10295 *elt_mode = field_mode;
10297 *n_elts = field_count;
10310 /* Return a nonzero value to say to return the function value in
10311 memory, just as large structures are always returned. TYPE will be
10312 the data type of the value, and FNTYPE will be the type of the
10313 function doing the returning, or @code{NULL} for libcalls.
10315 The AIX ABI for the RS/6000 specifies that all structures are
10316 returned in memory. The Darwin ABI does the same.
10318 For the Darwin 64 Bit ABI, a function result can be returned in
10319 registers or in memory, depending on the size of the return data
10320 type. If it is returned in registers, the value occupies the same
10321 registers as it would if it were the first and only function
10322 argument. Otherwise, the function places its result in memory at
10323 the location pointed to by GPR3.
10325 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10326 but a draft put them in memory, and GCC used to implement the draft
10327 instead of the final standard. Therefore, aix_struct_return
10328 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10329 compatibility can change DRAFT_V4_STRUCT_RET to override the
10330 default, and -m switches get the final word. See
10331 rs6000_option_override_internal for more details.
10333 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10334 long double support is enabled. These values are returned in memory.
10336 int_size_in_bytes returns -1 for variable size objects, which go in
10337 memory always. The cast to unsigned makes -1 > 8. */
10340 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10342 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
10344 && rs6000_darwin64_abi
10345 && TREE_CODE (type) == RECORD_TYPE
10346 && int_size_in_bytes (type) > 0)
10348 CUMULATIVE_ARGS valcum;
10352 valcum.fregno = FP_ARG_MIN_REG;
10353 valcum.vregno = ALTIVEC_ARG_MIN_REG;
10354 /* Do a trial code generation as if this were going to be passed
10355 as an argument; if any part goes in memory, we return NULL. */
10356 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
10359 /* Otherwise fall through to more conventional ABI rules. */
10362 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
10363 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
10367 /* The ELFv2 ABI returns aggregates up to 16B in registers */
10368 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
10369 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
10372 if (AGGREGATE_TYPE_P (type)
10373 && (aix_struct_return
10374 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
10377 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10378 modes only exist for GCC vector types if -maltivec. */
10379 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
10380 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
10383 /* Return synthetic vectors in memory. */
10384 if (TREE_CODE (type) == VECTOR_TYPE
10385 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10387 static bool warned_for_return_big_vectors = false;
10388 if (!warned_for_return_big_vectors)
10390 warning (OPT_Wpsabi, "GCC vector returned by reference: "
10391 "non-standard ABI extension with no compatibility "
10393 warned_for_return_big_vectors = true;
10398 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
10399 && FLOAT128_IEEE_P (TYPE_MODE (type)))
10405 /* Specify whether values returned in registers should be at the most
10406 significant end of a register. We want aggregates returned by
10407 value to match the way aggregates are passed to functions. */
10410 rs6000_return_in_msb (const_tree valtype)
10412 return (DEFAULT_ABI == ABI_ELFv2
10413 && BYTES_BIG_ENDIAN
10414 && AGGREGATE_TYPE_P (valtype)
10415 && (rs6000_function_arg_padding (TYPE_MODE (valtype), valtype)
10419 #ifdef HAVE_AS_GNU_ATTRIBUTE
10420 /* Return TRUE if a call to function FNDECL may be one that
10421 potentially affects the function calling ABI of the object file. */
10424 call_ABI_of_interest (tree fndecl)
10426 if (rs6000_gnu_attr && symtab->state == EXPANSION)
10428 struct cgraph_node *c_node;
10430 /* Libcalls are always interesting. */
10431 if (fndecl == NULL_TREE)
10434 /* Any call to an external function is interesting. */
10435 if (DECL_EXTERNAL (fndecl))
10438 /* Interesting functions that we are emitting in this object file. */
10439 c_node = cgraph_node::get (fndecl);
10440 c_node = c_node->ultimate_alias_target ();
10441 return !c_node->only_called_directly_p ();
10447 /* Initialize a variable CUM of type CUMULATIVE_ARGS
10448 for a call to a function whose data type is FNTYPE.
10449 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
10451 For incoming args we set the number of arguments in the prototype large
10452 so we never return a PARALLEL. */
10455 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
10456 rtx libname ATTRIBUTE_UNUSED, int incoming,
10457 int libcall, int n_named_args,
10459 machine_mode return_mode ATTRIBUTE_UNUSED)
10461 static CUMULATIVE_ARGS zero_cumulative;
10463 *cum = zero_cumulative;
10465 cum->fregno = FP_ARG_MIN_REG;
10466 cum->vregno = ALTIVEC_ARG_MIN_REG;
10467 cum->prototype = (fntype && prototype_p (fntype));
10468 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
10469 ? CALL_LIBCALL : CALL_NORMAL);
10470 cum->sysv_gregno = GP_ARG_MIN_REG;
10471 cum->stdarg = stdarg_p (fntype);
10472 cum->libcall = libcall;
10474 cum->nargs_prototype = 0;
10475 if (incoming || cum->prototype)
10476 cum->nargs_prototype = n_named_args;
10478 /* Check for a longcall attribute. */
10479 if ((!fntype && rs6000_default_long_calls)
10481 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
10482 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
10483 cum->call_cookie |= CALL_LONG;
10484 else if (DEFAULT_ABI != ABI_DARWIN)
10486 bool is_local = (fndecl
10487 && !DECL_EXTERNAL (fndecl)
10488 && !DECL_WEAK (fndecl)
10489 && (*targetm.binds_local_p) (fndecl));
10495 && lookup_attribute ("noplt", TYPE_ATTRIBUTES (fntype)))
10496 cum->call_cookie |= CALL_LONG;
10501 && lookup_attribute ("plt", TYPE_ATTRIBUTES (fntype))))
10502 cum->call_cookie |= CALL_LONG;
10506 if (TARGET_DEBUG_ARG)
10508 fprintf (stderr, "\ninit_cumulative_args:");
10511 tree ret_type = TREE_TYPE (fntype);
10512 fprintf (stderr, " ret code = %s,",
10513 get_tree_code_name (TREE_CODE (ret_type)));
10516 if (cum->call_cookie & CALL_LONG)
10517 fprintf (stderr, " longcall,");
10519 fprintf (stderr, " proto = %d, nargs = %d\n",
10520 cum->prototype, cum->nargs_prototype);
10523 #ifdef HAVE_AS_GNU_ATTRIBUTE
10524 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
10526 cum->escapes = call_ABI_of_interest (fndecl);
10533 return_type = TREE_TYPE (fntype);
10534 return_mode = TYPE_MODE (return_type);
10537 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
10539 if (return_type != NULL)
10541 if (TREE_CODE (return_type) == RECORD_TYPE
10542 && TYPE_TRANSPARENT_AGGR (return_type))
10544 return_type = TREE_TYPE (first_field (return_type));
10545 return_mode = TYPE_MODE (return_type);
10547 if (AGGREGATE_TYPE_P (return_type)
10548 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
10550 rs6000_returns_struct = true;
10552 if (SCALAR_FLOAT_MODE_P (return_mode))
10554 rs6000_passes_float = true;
10555 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
10556 && (FLOAT128_IBM_P (return_mode)
10557 || FLOAT128_IEEE_P (return_mode)
10558 || (return_type != NULL
10559 && (TYPE_MAIN_VARIANT (return_type)
10560 == long_double_type_node))))
10561 rs6000_passes_long_double = true;
10563 /* Note if we passed or return a IEEE 128-bit type. We changed
10564 the mangling for these types, and we may need to make an alias
10565 with the old mangling. */
10566 if (FLOAT128_IEEE_P (return_mode))
10567 rs6000_passes_ieee128 = true;
10569 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode))
10570 rs6000_passes_vector = true;
10577 && TARGET_ALTIVEC_ABI
10578 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
10580 error ("cannot return value in vector register because"
10581 " altivec instructions are disabled, use %qs"
10582 " to enable them", "-maltivec");
10586 /* The mode the ABI uses for a word. This is not the same as word_mode
10587 for -m32 -mpowerpc64. This is used to implement various target hooks. */
10589 static scalar_int_mode
10590 rs6000_abi_word_mode (void)
10592 return TARGET_32BIT ? SImode : DImode;
10595 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
10597 rs6000_offload_options (void)
10600 return xstrdup ("-foffload-abi=lp64");
10602 return xstrdup ("-foffload-abi=ilp32");
10605 /* On rs6000, function arguments are promoted, as are function return
10608 static machine_mode
10609 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
10611 int *punsignedp ATTRIBUTE_UNUSED,
10614 PROMOTE_MODE (mode, *punsignedp, type);
10619 /* Return true if TYPE must be passed on the stack and not in registers. */
10622 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
10624 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
10625 return must_pass_in_stack_var_size (mode, type);
10627 return must_pass_in_stack_var_size_or_pad (mode, type);
10631 is_complex_IBM_long_double (machine_mode mode)
10633 return mode == ICmode || (mode == TCmode && FLOAT128_IBM_P (TCmode));
10636 /* Whether ABI_V4 passes MODE args to a function in floating point
10640 abi_v4_pass_in_fpr (machine_mode mode, bool named)
10642 if (!TARGET_HARD_FLOAT)
10644 if (mode == DFmode)
10646 if (mode == SFmode && named)
10648 /* ABI_V4 passes complex IBM long double in 8 gprs.
10649 Stupid, but we can't change the ABI now. */
10650 if (is_complex_IBM_long_double (mode))
10652 if (FLOAT128_2REG_P (mode))
10654 if (DECIMAL_FLOAT_MODE_P (mode))
10659 /* Implement TARGET_FUNCTION_ARG_PADDING.
10661 For the AIX ABI structs are always stored left shifted in their
10664 static pad_direction
10665 rs6000_function_arg_padding (machine_mode mode, const_tree type)
10667 #ifndef AGGREGATE_PADDING_FIXED
10668 #define AGGREGATE_PADDING_FIXED 0
10670 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
10671 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
10674 if (!AGGREGATE_PADDING_FIXED)
10676 /* GCC used to pass structures of the same size as integer types as
10677 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
10678 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
10679 passed padded downward, except that -mstrict-align further
10680 muddied the water in that multi-component structures of 2 and 4
10681 bytes in size were passed padded upward.
10683 The following arranges for best compatibility with previous
10684 versions of gcc, but removes the -mstrict-align dependency. */
10685 if (BYTES_BIG_ENDIAN)
10687 HOST_WIDE_INT size = 0;
10689 if (mode == BLKmode)
10691 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
10692 size = int_size_in_bytes (type);
10695 size = GET_MODE_SIZE (mode);
10697 if (size == 1 || size == 2 || size == 4)
10698 return PAD_DOWNWARD;
10703 if (AGGREGATES_PAD_UPWARD_ALWAYS)
10705 if (type != 0 && AGGREGATE_TYPE_P (type))
10709 /* Fall back to the default. */
10710 return default_function_arg_padding (mode, type);
10713 /* If defined, a C expression that gives the alignment boundary, in bits,
10714 of an argument with the specified mode and type. If it is not defined,
10715 PARM_BOUNDARY is used for all arguments.
10717 V.4 wants long longs and doubles to be double word aligned. Just
10718 testing the mode size is a boneheaded way to do this as it means
10719 that other types such as complex int are also double word aligned.
10720 However, we're stuck with this because changing the ABI might break
10721 existing library interfaces.
10723 Quadword align Altivec/VSX vectors.
10724 Quadword align large synthetic vector types. */
10726 static unsigned int
10727 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
10729 machine_mode elt_mode;
10732 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10734 if (DEFAULT_ABI == ABI_V4
10735 && (GET_MODE_SIZE (mode) == 8
10736 || (TARGET_HARD_FLOAT
10737 && !is_complex_IBM_long_double (mode)
10738 && FLOAT128_2REG_P (mode))))
10740 else if (FLOAT128_VECTOR_P (mode))
10742 else if (type && TREE_CODE (type) == VECTOR_TYPE
10743 && int_size_in_bytes (type) >= 8
10744 && int_size_in_bytes (type) < 16)
10746 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10747 || (type && TREE_CODE (type) == VECTOR_TYPE
10748 && int_size_in_bytes (type) >= 16))
10751 /* Aggregate types that need > 8 byte alignment are quadword-aligned
10752 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
10753 -mcompat-align-parm is used. */
10754 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
10755 || DEFAULT_ABI == ABI_ELFv2)
10756 && type && TYPE_ALIGN (type) > 64)
10758 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
10759 or homogeneous float/vector aggregates here. We already handled
10760 vector aggregates above, but still need to check for float here. */
10761 bool aggregate_p = (AGGREGATE_TYPE_P (type)
10762 && !SCALAR_FLOAT_MODE_P (elt_mode));
10764 /* We used to check for BLKmode instead of the above aggregate type
10765 check. Warn when this results in any difference to the ABI. */
10766 if (aggregate_p != (mode == BLKmode))
10768 static bool warned;
10769 if (!warned && warn_psabi)
10772 inform (input_location,
10773 "the ABI of passing aggregates with %d-byte alignment"
10774 " has changed in GCC 5",
10775 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
10783 /* Similar for the Darwin64 ABI. Note that for historical reasons we
10784 implement the "aggregate type" check as a BLKmode check here; this
10785 means certain aggregate types are in fact not aligned. */
10786 if (TARGET_MACHO && rs6000_darwin64_abi
10788 && type && TYPE_ALIGN (type) > 64)
10791 return PARM_BOUNDARY;
10794 /* The offset in words to the start of the parameter save area. */
10796 static unsigned int
10797 rs6000_parm_offset (void)
10799 return (DEFAULT_ABI == ABI_V4 ? 2
10800 : DEFAULT_ABI == ABI_ELFv2 ? 4
10804 /* For a function parm of MODE and TYPE, return the starting word in
10805 the parameter area. NWORDS of the parameter area are already used. */
10807 static unsigned int
10808 rs6000_parm_start (machine_mode mode, const_tree type,
10809 unsigned int nwords)
10811 unsigned int align;
10813 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
10814 return nwords + (-(rs6000_parm_offset () + nwords) & align);
10817 /* Compute the size (in words) of a function argument. */
10819 static unsigned long
10820 rs6000_arg_size (machine_mode mode, const_tree type)
10822 unsigned long size;
10824 if (mode != BLKmode)
10825 size = GET_MODE_SIZE (mode);
10827 size = int_size_in_bytes (type);
10830 return (size + 3) >> 2;
10832 return (size + 7) >> 3;
10835 /* Use this to flush pending int fields. */
10838 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
10839 HOST_WIDE_INT bitpos, int final)
10841 unsigned int startbit, endbit;
10842 int intregs, intoffset;
10844 /* Handle the situations where a float is taking up the first half
10845 of the GPR, and the other half is empty (typically due to
10846 alignment restrictions). We can detect this by a 8-byte-aligned
10847 int field, or by seeing that this is the final flush for this
10848 argument. Count the word and continue on. */
10849 if (cum->floats_in_gpr == 1
10850 && (cum->intoffset % 64 == 0
10851 || (cum->intoffset == -1 && final)))
10854 cum->floats_in_gpr = 0;
10857 if (cum->intoffset == -1)
10860 intoffset = cum->intoffset;
10861 cum->intoffset = -1;
10862 cum->floats_in_gpr = 0;
10864 if (intoffset % BITS_PER_WORD != 0)
10866 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
10867 if (!int_mode_for_size (bits, 0).exists ())
10869 /* We couldn't find an appropriate mode, which happens,
10870 e.g., in packed structs when there are 3 bytes to load.
10871 Back intoffset back to the beginning of the word in this
10873 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
10877 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
10878 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
10879 intregs = (endbit - startbit) / BITS_PER_WORD;
10880 cum->words += intregs;
10881 /* words should be unsigned. */
10882 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
10884 int pad = (endbit/BITS_PER_WORD) - cum->words;
10889 /* The darwin64 ABI calls for us to recurse down through structs,
10890 looking for elements passed in registers. Unfortunately, we have
10891 to track int register count here also because of misalignments
10892 in powerpc alignment mode. */
10895 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
10897 HOST_WIDE_INT startbitpos)
10901 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10902 if (TREE_CODE (f) == FIELD_DECL)
10904 HOST_WIDE_INT bitpos = startbitpos;
10905 tree ftype = TREE_TYPE (f);
10907 if (ftype == error_mark_node)
10909 mode = TYPE_MODE (ftype);
10911 if (DECL_SIZE (f) != 0
10912 && tree_fits_uhwi_p (bit_position (f)))
10913 bitpos += int_bit_position (f);
10915 /* ??? FIXME: else assume zero offset. */
10917 if (TREE_CODE (ftype) == RECORD_TYPE)
10918 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
10919 else if (USE_FP_FOR_ARG_P (cum, mode))
10921 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
10922 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10923 cum->fregno += n_fpregs;
10924 /* Single-precision floats present a special problem for
10925 us, because they are smaller than an 8-byte GPR, and so
10926 the structure-packing rules combined with the standard
10927 varargs behavior mean that we want to pack float/float
10928 and float/int combinations into a single register's
10929 space. This is complicated by the arg advance flushing,
10930 which works on arbitrarily large groups of int-type
10932 if (mode == SFmode)
10934 if (cum->floats_in_gpr == 1)
10936 /* Two floats in a word; count the word and reset
10937 the float count. */
10939 cum->floats_in_gpr = 0;
10941 else if (bitpos % 64 == 0)
10943 /* A float at the beginning of an 8-byte word;
10944 count it and put off adjusting cum->words until
10945 we see if a arg advance flush is going to do it
10947 cum->floats_in_gpr++;
10951 /* The float is at the end of a word, preceded
10952 by integer fields, so the arg advance flush
10953 just above has already set cum->words and
10954 everything is taken care of. */
10958 cum->words += n_fpregs;
10960 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10962 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10966 else if (cum->intoffset == -1)
10967 cum->intoffset = bitpos;
10971 /* Check for an item that needs to be considered specially under the darwin 64
10972 bit ABI. These are record types where the mode is BLK or the structure is
10973 8 bytes in size. */
10975 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
10977 return rs6000_darwin64_abi
10978 && ((mode == BLKmode
10979 && TREE_CODE (type) == RECORD_TYPE
10980 && int_size_in_bytes (type) > 0)
10981 || (type && TREE_CODE (type) == RECORD_TYPE
10982 && int_size_in_bytes (type) == 8)) ? 1 : 0;
10985 /* Update the data in CUM to advance over an argument
10986 of mode MODE and data type TYPE.
10987 (TYPE is null for libcalls where that information may not be available.)
10989 Note that for args passed by reference, function_arg will be called
10990 with MODE and TYPE set to that of the pointer to the arg, not the arg
10994 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
10995 const_tree type, bool named, int depth)
10997 machine_mode elt_mode;
11000 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11002 /* Only tick off an argument if we're not recursing. */
11004 cum->nargs_prototype--;
11006 #ifdef HAVE_AS_GNU_ATTRIBUTE
11007 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
11010 if (SCALAR_FLOAT_MODE_P (mode))
11012 rs6000_passes_float = true;
11013 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11014 && (FLOAT128_IBM_P (mode)
11015 || FLOAT128_IEEE_P (mode)
11017 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
11018 rs6000_passes_long_double = true;
11020 /* Note if we passed or return a IEEE 128-bit type. We changed the
11021 mangling for these types, and we may need to make an alias with
11022 the old mangling. */
11023 if (FLOAT128_IEEE_P (mode))
11024 rs6000_passes_ieee128 = true;
11026 if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
11027 rs6000_passes_vector = true;
11031 if (TARGET_ALTIVEC_ABI
11032 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11033 || (type && TREE_CODE (type) == VECTOR_TYPE
11034 && int_size_in_bytes (type) == 16)))
11036 bool stack = false;
11038 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11040 cum->vregno += n_elts;
11042 if (!TARGET_ALTIVEC)
11043 error ("cannot pass argument in vector register because"
11044 " altivec instructions are disabled, use %qs"
11045 " to enable them", "-maltivec");
11047 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
11048 even if it is going to be passed in a vector register.
11049 Darwin does the same for variable-argument functions. */
11050 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11052 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
11062 /* Vector parameters must be 16-byte aligned. In 32-bit
11063 mode this means we need to take into account the offset
11064 to the parameter save area. In 64-bit mode, they just
11065 have to start on an even word, since the parameter save
11066 area is 16-byte aligned. */
11068 align = -(rs6000_parm_offset () + cum->words) & 3;
11070 align = cum->words & 1;
11071 cum->words += align + rs6000_arg_size (mode, type);
11073 if (TARGET_DEBUG_ARG)
11075 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
11076 cum->words, align);
11077 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
11078 cum->nargs_prototype, cum->prototype,
11079 GET_MODE_NAME (mode));
11083 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11085 int size = int_size_in_bytes (type);
11086 /* Variable sized types have size == -1 and are
11087 treated as if consisting entirely of ints.
11088 Pad to 16 byte boundary if needed. */
11089 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11090 && (cum->words % 2) != 0)
11092 /* For varargs, we can just go up by the size of the struct. */
11094 cum->words += (size + 7) / 8;
11097 /* It is tempting to say int register count just goes up by
11098 sizeof(type)/8, but this is wrong in a case such as
11099 { int; double; int; } [powerpc alignment]. We have to
11100 grovel through the fields for these too. */
11101 cum->intoffset = 0;
11102 cum->floats_in_gpr = 0;
11103 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
11104 rs6000_darwin64_record_arg_advance_flush (cum,
11105 size * BITS_PER_UNIT, 1);
11107 if (TARGET_DEBUG_ARG)
11109 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
11110 cum->words, TYPE_ALIGN (type), size);
11112 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
11113 cum->nargs_prototype, cum->prototype,
11114 GET_MODE_NAME (mode));
11117 else if (DEFAULT_ABI == ABI_V4)
11119 if (abi_v4_pass_in_fpr (mode, named))
11121 /* _Decimal128 must use an even/odd register pair. This assumes
11122 that the register number is odd when fregno is odd. */
11123 if (mode == TDmode && (cum->fregno % 2) == 1)
11126 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11127 <= FP_ARG_V4_MAX_REG)
11128 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
11131 cum->fregno = FP_ARG_V4_MAX_REG + 1;
11132 if (mode == DFmode || FLOAT128_IBM_P (mode)
11133 || mode == DDmode || mode == TDmode)
11134 cum->words += cum->words & 1;
11135 cum->words += rs6000_arg_size (mode, type);
11140 int n_words = rs6000_arg_size (mode, type);
11141 int gregno = cum->sysv_gregno;
11143 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
11144 As does any other 2 word item such as complex int due to a
11145 historical mistake. */
11147 gregno += (1 - gregno) & 1;
11149 /* Multi-reg args are not split between registers and stack. */
11150 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11152 /* Long long is aligned on the stack. So are other 2 word
11153 items such as complex int due to a historical mistake. */
11155 cum->words += cum->words & 1;
11156 cum->words += n_words;
11159 /* Note: continuing to accumulate gregno past when we've started
11160 spilling to the stack indicates the fact that we've started
11161 spilling to the stack to expand_builtin_saveregs. */
11162 cum->sysv_gregno = gregno + n_words;
11165 if (TARGET_DEBUG_ARG)
11167 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11168 cum->words, cum->fregno);
11169 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
11170 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
11171 fprintf (stderr, "mode = %4s, named = %d\n",
11172 GET_MODE_NAME (mode), named);
11177 int n_words = rs6000_arg_size (mode, type);
11178 int start_words = cum->words;
11179 int align_words = rs6000_parm_start (mode, type, start_words);
11181 cum->words = align_words + n_words;
11183 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT)
11185 /* _Decimal128 must be passed in an even/odd float register pair.
11186 This assumes that the register number is odd when fregno is
11188 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11190 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11193 if (TARGET_DEBUG_ARG)
11195 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11196 cum->words, cum->fregno);
11197 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11198 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11199 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11200 named, align_words - start_words, depth);
11206 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11207 const_tree type, bool named)
11209 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11213 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11214 structure between cum->intoffset and bitpos to integer registers. */
11217 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11218 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11221 unsigned int regno;
11222 unsigned int startbit, endbit;
11223 int this_regno, intregs, intoffset;
11226 if (cum->intoffset == -1)
11229 intoffset = cum->intoffset;
11230 cum->intoffset = -1;
11232 /* If this is the trailing part of a word, try to only load that
11233 much into the register. Otherwise load the whole register. Note
11234 that in the latter case we may pick up unwanted bits. It's not a
11235 problem at the moment but may wish to revisit. */
11237 if (intoffset % BITS_PER_WORD != 0)
11239 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
11240 if (!int_mode_for_size (bits, 0).exists (&mode))
11242 /* We couldn't find an appropriate mode, which happens,
11243 e.g., in packed structs when there are 3 bytes to load.
11244 Back intoffset back to the beginning of the word in this
11246 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11253 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11254 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11255 intregs = (endbit - startbit) / BITS_PER_WORD;
11256 this_regno = cum->words + intoffset / BITS_PER_WORD;
11258 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11259 cum->use_stack = 1;
11261 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
11265 intoffset /= BITS_PER_UNIT;
11268 regno = GP_ARG_MIN_REG + this_regno;
11269 reg = gen_rtx_REG (mode, regno);
11271 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
11274 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
11278 while (intregs > 0);
11281 /* Recursive workhorse for the following. */
11284 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
11285 HOST_WIDE_INT startbitpos, rtx rvec[],
11290 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11291 if (TREE_CODE (f) == FIELD_DECL)
11293 HOST_WIDE_INT bitpos = startbitpos;
11294 tree ftype = TREE_TYPE (f);
11296 if (ftype == error_mark_node)
11298 mode = TYPE_MODE (ftype);
11300 if (DECL_SIZE (f) != 0
11301 && tree_fits_uhwi_p (bit_position (f)))
11302 bitpos += int_bit_position (f);
11304 /* ??? FIXME: else assume zero offset. */
11306 if (TREE_CODE (ftype) == RECORD_TYPE)
11307 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
11308 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
11310 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
11314 case E_SCmode: mode = SFmode; break;
11315 case E_DCmode: mode = DFmode; break;
11316 case E_TCmode: mode = TFmode; break;
11320 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11321 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
11323 gcc_assert (cum->fregno == FP_ARG_MAX_REG
11324 && (mode == TFmode || mode == TDmode));
11325 /* Long double or _Decimal128 split over regs and memory. */
11326 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
11330 = gen_rtx_EXPR_LIST (VOIDmode,
11331 gen_rtx_REG (mode, cum->fregno++),
11332 GEN_INT (bitpos / BITS_PER_UNIT));
11333 if (FLOAT128_2REG_P (mode))
11336 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11338 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11340 = gen_rtx_EXPR_LIST (VOIDmode,
11341 gen_rtx_REG (mode, cum->vregno++),
11342 GEN_INT (bitpos / BITS_PER_UNIT));
11344 else if (cum->intoffset == -1)
11345 cum->intoffset = bitpos;
11349 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
11350 the register(s) to be used for each field and subfield of a struct
11351 being passed by value, along with the offset of where the
11352 register's value may be found in the block. FP fields go in FP
11353 register, vector fields go in vector registers, and everything
11354 else goes in int registers, packed as in memory.
11356 This code is also used for function return values. RETVAL indicates
11357 whether this is the case.
11359 Much of this is taken from the SPARC V9 port, which has a similar
11360 calling convention. */
11363 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
11364 bool named, bool retval)
11366 rtx rvec[FIRST_PSEUDO_REGISTER];
11367 int k = 1, kbase = 1;
11368 HOST_WIDE_INT typesize = int_size_in_bytes (type);
11369 /* This is a copy; modifications are not visible to our caller. */
11370 CUMULATIVE_ARGS copy_cum = *orig_cum;
11371 CUMULATIVE_ARGS *cum = ©_cum;
11373 /* Pad to 16 byte boundary if needed. */
11374 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11375 && (cum->words % 2) != 0)
11378 cum->intoffset = 0;
11379 cum->use_stack = 0;
11380 cum->named = named;
11382 /* Put entries into rvec[] for individual FP and vector fields, and
11383 for the chunks of memory that go in int regs. Note we start at
11384 element 1; 0 is reserved for an indication of using memory, and
11385 may or may not be filled in below. */
11386 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
11387 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
11389 /* If any part of the struct went on the stack put all of it there.
11390 This hack is because the generic code for
11391 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
11392 parts of the struct are not at the beginning. */
11393 if (cum->use_stack)
11396 return NULL_RTX; /* doesn't go in registers at all */
11398 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11400 if (k > 1 || cum->use_stack)
11401 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
11406 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
11409 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
11414 rtx rvec[GP_ARG_NUM_REG + 1];
11416 if (align_words >= GP_ARG_NUM_REG)
11419 n_units = rs6000_arg_size (mode, type);
11421 /* Optimize the simple case where the arg fits in one gpr, except in
11422 the case of BLKmode due to assign_parms assuming that registers are
11423 BITS_PER_WORD wide. */
11425 || (n_units == 1 && mode != BLKmode))
11426 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11429 if (align_words + n_units > GP_ARG_NUM_REG)
11430 /* Not all of the arg fits in gprs. Say that it goes in memory too,
11431 using a magic NULL_RTX component.
11432 This is not strictly correct. Only some of the arg belongs in
11433 memory, not all of it. However, the normal scheme using
11434 function_arg_partial_nregs can result in unusual subregs, eg.
11435 (subreg:SI (reg:DF) 4), which are not handled well. The code to
11436 store the whole arg to memory is often more efficient than code
11437 to store pieces, and we know that space is available in the right
11438 place for the whole arg. */
11439 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11444 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
11445 rtx off = GEN_INT (i++ * 4);
11446 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11448 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
11450 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11453 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
11454 but must also be copied into the parameter save area starting at
11455 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
11456 to the GPRs and/or memory. Return the number of elements used. */
11459 rs6000_psave_function_arg (machine_mode mode, const_tree type,
11460 int align_words, rtx *rvec)
11464 if (align_words < GP_ARG_NUM_REG)
11466 int n_words = rs6000_arg_size (mode, type);
11468 if (align_words + n_words > GP_ARG_NUM_REG
11470 || (TARGET_32BIT && TARGET_POWERPC64))
11472 /* If this is partially on the stack, then we only
11473 include the portion actually in registers here. */
11474 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11477 if (align_words + n_words > GP_ARG_NUM_REG)
11479 /* Not all of the arg fits in gprs. Say that it goes in memory
11480 too, using a magic NULL_RTX component. Also see comment in
11481 rs6000_mixed_function_arg for why the normal
11482 function_arg_partial_nregs scheme doesn't work in this case. */
11483 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11488 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11489 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
11490 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11492 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11496 /* The whole arg fits in gprs. */
11497 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11498 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
11503 /* It's entirely in memory. */
11504 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11510 /* RVEC is a vector of K components of an argument of mode MODE.
11511 Construct the final function_arg return value from it. */
11514 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
11516 gcc_assert (k >= 1);
11518 /* Avoid returning a PARALLEL in the trivial cases. */
11521 if (XEXP (rvec[0], 0) == NULL_RTX)
11524 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
11525 return XEXP (rvec[0], 0);
11528 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11531 /* Determine where to put an argument to a function.
11532 Value is zero to push the argument on the stack,
11533 or a hard register in which to store the argument.
11535 MODE is the argument's machine mode.
11536 TYPE is the data type of the argument (as a tree).
11537 This is null for libcalls where that information may
11539 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11540 the preceding args and about the function being called. It is
11541 not modified in this routine.
11542 NAMED is nonzero if this argument is a named parameter
11543 (otherwise it is an extra parameter matching an ellipsis).
11545 On RS/6000 the first eight words of non-FP are normally in registers
11546 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
11547 Under V.4, the first 8 FP args are in registers.
11549 If this is floating-point and no prototype is specified, we use
11550 both an FP and integer register (or possibly FP reg and stack). Library
11551 functions (when CALL_LIBCALL is set) always have the proper types for args,
11552 so we can pass the FP value just in one register. emit_library_function
11553 doesn't support PARALLEL anyway.
11555 Note that for args passed by reference, function_arg will be called
11556 with MODE and TYPE set to that of the pointer to the arg, not the arg
11560 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
11561 const_tree type, bool named)
11563 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11564 enum rs6000_abi abi = DEFAULT_ABI;
11565 machine_mode elt_mode;
11568 /* Return a marker to indicate whether CR1 needs to set or clear the
11569 bit that V.4 uses to say fp args were passed in registers.
11570 Assume that we don't need the marker for software floating point,
11571 or compiler generated library calls. */
11572 if (mode == VOIDmode)
11575 && (cum->call_cookie & CALL_LIBCALL) == 0
11577 || (cum->nargs_prototype < 0
11578 && (cum->prototype || TARGET_NO_PROTOTYPE)))
11579 && TARGET_HARD_FLOAT)
11580 return GEN_INT (cum->call_cookie
11581 | ((cum->fregno == FP_ARG_MIN_REG)
11582 ? CALL_V4_SET_FP_ARGS
11583 : CALL_V4_CLEAR_FP_ARGS));
11585 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
11588 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11590 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11592 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
11593 if (rslt != NULL_RTX)
11595 /* Else fall through to usual handling. */
11598 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11600 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11604 /* Do we also need to pass this argument in the parameter save area?
11605 Library support functions for IEEE 128-bit are assumed to not need the
11606 value passed both in GPRs and in vector registers. */
11607 if (TARGET_64BIT && !cum->prototype
11608 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11610 int align_words = ROUND_UP (cum->words, 2);
11611 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11614 /* Describe where this argument goes in the vector registers. */
11615 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
11617 r = gen_rtx_REG (elt_mode, cum->vregno + i);
11618 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11619 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11622 return rs6000_finish_function_arg (mode, rvec, k);
11624 else if (TARGET_ALTIVEC_ABI
11625 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
11626 || (type && TREE_CODE (type) == VECTOR_TYPE
11627 && int_size_in_bytes (type) == 16)))
11629 if (named || abi == ABI_V4)
11633 /* Vector parameters to varargs functions under AIX or Darwin
11634 get passed in memory and possibly also in GPRs. */
11635 int align, align_words, n_words;
11636 machine_mode part_mode;
11638 /* Vector parameters must be 16-byte aligned. In 32-bit
11639 mode this means we need to take into account the offset
11640 to the parameter save area. In 64-bit mode, they just
11641 have to start on an even word, since the parameter save
11642 area is 16-byte aligned. */
11644 align = -(rs6000_parm_offset () + cum->words) & 3;
11646 align = cum->words & 1;
11647 align_words = cum->words + align;
11649 /* Out of registers? Memory, then. */
11650 if (align_words >= GP_ARG_NUM_REG)
11653 if (TARGET_32BIT && TARGET_POWERPC64)
11654 return rs6000_mixed_function_arg (mode, type, align_words);
11656 /* The vector value goes in GPRs. Only the part of the
11657 value in GPRs is reported here. */
11659 n_words = rs6000_arg_size (mode, type);
11660 if (align_words + n_words > GP_ARG_NUM_REG)
11661 /* Fortunately, there are only two possibilities, the value
11662 is either wholly in GPRs or half in GPRs and half not. */
11663 part_mode = DImode;
11665 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
11669 else if (abi == ABI_V4)
11671 if (abi_v4_pass_in_fpr (mode, named))
11673 /* _Decimal128 must use an even/odd register pair. This assumes
11674 that the register number is odd when fregno is odd. */
11675 if (mode == TDmode && (cum->fregno % 2) == 1)
11678 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11679 <= FP_ARG_V4_MAX_REG)
11680 return gen_rtx_REG (mode, cum->fregno);
11686 int n_words = rs6000_arg_size (mode, type);
11687 int gregno = cum->sysv_gregno;
11689 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
11690 As does any other 2 word item such as complex int due to a
11691 historical mistake. */
11693 gregno += (1 - gregno) & 1;
11695 /* Multi-reg args are not split between registers and stack. */
11696 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11699 if (TARGET_32BIT && TARGET_POWERPC64)
11700 return rs6000_mixed_function_arg (mode, type,
11701 gregno - GP_ARG_MIN_REG);
11702 return gen_rtx_REG (mode, gregno);
11707 int align_words = rs6000_parm_start (mode, type, cum->words);
11709 /* _Decimal128 must be passed in an even/odd float register pair.
11710 This assumes that the register number is odd when fregno is odd. */
11711 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11714 if (USE_FP_FOR_ARG_P (cum, elt_mode)
11715 && !(TARGET_AIX && !TARGET_ELF
11716 && type != NULL && AGGREGATE_TYPE_P (type)))
11718 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11721 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11724 /* Do we also need to pass this argument in the parameter
11726 if (type && (cum->nargs_prototype <= 0
11727 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11728 && TARGET_XL_COMPAT
11729 && align_words >= GP_ARG_NUM_REG)))
11730 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11732 /* Describe where this argument goes in the fprs. */
11733 for (i = 0; i < n_elts
11734 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
11736 /* Check if the argument is split over registers and memory.
11737 This can only ever happen for long double or _Decimal128;
11738 complex types are handled via split_complex_arg. */
11739 machine_mode fmode = elt_mode;
11740 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
11742 gcc_assert (FLOAT128_2REG_P (fmode));
11743 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
11746 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
11747 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11748 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11751 /* If there were not enough FPRs to hold the argument, the rest
11752 usually goes into memory. However, if the current position
11753 is still within the register parameter area, a portion may
11754 actually have to go into GPRs.
11756 Note that it may happen that the portion of the argument
11757 passed in the first "half" of the first GPR was already
11758 passed in the last FPR as well.
11760 For unnamed arguments, we already set up GPRs to cover the
11761 whole argument in rs6000_psave_function_arg, so there is
11762 nothing further to do at this point. */
11763 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
11764 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
11765 && cum->nargs_prototype > 0)
11767 static bool warned;
11769 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11770 int n_words = rs6000_arg_size (mode, type);
11772 align_words += fpr_words;
11773 n_words -= fpr_words;
11777 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11778 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
11779 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11781 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11783 if (!warned && warn_psabi)
11786 inform (input_location,
11787 "the ABI of passing homogeneous float aggregates"
11788 " has changed in GCC 5");
11792 return rs6000_finish_function_arg (mode, rvec, k);
11794 else if (align_words < GP_ARG_NUM_REG)
11796 if (TARGET_32BIT && TARGET_POWERPC64)
11797 return rs6000_mixed_function_arg (mode, type, align_words);
11799 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11806 /* For an arg passed partly in registers and partly in memory, this is
11807 the number of bytes passed in registers. For args passed entirely in
11808 registers or entirely in memory, zero. When an arg is described by a
11809 PARALLEL, perhaps using more than one register type, this function
11810 returns the number of bytes used by the first element of the PARALLEL. */
11813 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
11814 tree type, bool named)
11816 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11817 bool passed_in_gprs = true;
11820 machine_mode elt_mode;
11823 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11825 if (DEFAULT_ABI == ABI_V4)
11828 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11830 /* If we are passing this arg in the fixed parameter save area (gprs or
11831 memory) as well as VRs, we do not use the partial bytes mechanism;
11832 instead, rs6000_function_arg will return a PARALLEL including a memory
11833 element as necessary. Library support functions for IEEE 128-bit are
11834 assumed to not need the value passed both in GPRs and in vector
11836 if (TARGET_64BIT && !cum->prototype
11837 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11840 /* Otherwise, we pass in VRs only. Check for partial copies. */
11841 passed_in_gprs = false;
11842 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
11843 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
11846 /* In this complicated case we just disable the partial_nregs code. */
11847 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11850 align_words = rs6000_parm_start (mode, type, cum->words);
11852 if (USE_FP_FOR_ARG_P (cum, elt_mode)
11853 && !(TARGET_AIX && !TARGET_ELF
11854 && type != NULL && AGGREGATE_TYPE_P (type)))
11856 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11858 /* If we are passing this arg in the fixed parameter save area
11859 (gprs or memory) as well as FPRs, we do not use the partial
11860 bytes mechanism; instead, rs6000_function_arg will return a
11861 PARALLEL including a memory element as necessary. */
11863 && (cum->nargs_prototype <= 0
11864 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11865 && TARGET_XL_COMPAT
11866 && align_words >= GP_ARG_NUM_REG)))
11869 /* Otherwise, we pass in FPRs only. Check for partial copies. */
11870 passed_in_gprs = false;
11871 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
11873 /* Compute number of bytes / words passed in FPRs. If there
11874 is still space available in the register parameter area
11875 *after* that amount, a part of the argument will be passed
11876 in GPRs. In that case, the total amount passed in any
11877 registers is equal to the amount that would have been passed
11878 in GPRs if everything were passed there, so we fall back to
11879 the GPR code below to compute the appropriate value. */
11880 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
11881 * MIN (8, GET_MODE_SIZE (elt_mode)));
11882 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
11884 if (align_words + fpr_words < GP_ARG_NUM_REG)
11885 passed_in_gprs = true;
11892 && align_words < GP_ARG_NUM_REG
11893 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
11894 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
11896 if (ret != 0 && TARGET_DEBUG_ARG)
11897 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
11902 /* A C expression that indicates when an argument must be passed by
11903 reference. If nonzero for an argument, a copy of that argument is
11904 made in memory and a pointer to the argument is passed instead of
11905 the argument itself. The pointer is passed in whatever way is
11906 appropriate for passing a pointer to that type.
11908 Under V.4, aggregates and long double are passed by reference.
11910 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
11911 reference unless the AltiVec vector extension ABI is in force.
11913 As an extension to all ABIs, variable sized types are passed by
11917 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
11918 machine_mode mode, const_tree type,
11919 bool named ATTRIBUTE_UNUSED)
11924 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11925 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11927 if (TARGET_DEBUG_ARG)
11928 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
11932 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
11934 if (TARGET_DEBUG_ARG)
11935 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
11939 if (int_size_in_bytes (type) < 0)
11941 if (TARGET_DEBUG_ARG)
11942 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
11946 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11947 modes only exist for GCC vector types if -maltivec. */
11948 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
11950 if (TARGET_DEBUG_ARG)
11951 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
11955 /* Pass synthetic vectors in memory. */
11956 if (TREE_CODE (type) == VECTOR_TYPE
11957 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11959 static bool warned_for_pass_big_vectors = false;
11960 if (TARGET_DEBUG_ARG)
11961 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
11962 if (!warned_for_pass_big_vectors)
11964 warning (OPT_Wpsabi, "GCC vector passed by reference: "
11965 "non-standard ABI extension with no compatibility "
11967 warned_for_pass_big_vectors = true;
11975 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
11976 already processes. Return true if the parameter must be passed
11977 (fully or partially) on the stack. */
11980 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
11986 /* Catch errors. */
11987 if (type == NULL || type == error_mark_node)
11990 /* Handle types with no storage requirement. */
11991 if (TYPE_MODE (type) == VOIDmode)
11994 /* Handle complex types. */
11995 if (TREE_CODE (type) == COMPLEX_TYPE)
11996 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
11997 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
11999 /* Handle transparent aggregates. */
12000 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
12001 && TYPE_TRANSPARENT_AGGR (type))
12002 type = TREE_TYPE (first_field (type));
12004 /* See if this arg was passed by invisible reference. */
12005 if (pass_by_reference (get_cumulative_args (args_so_far),
12006 TYPE_MODE (type), type, true))
12007 type = build_pointer_type (type);
12009 /* Find mode as it is passed by the ABI. */
12010 unsignedp = TYPE_UNSIGNED (type);
12011 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
12013 /* If we must pass in stack, we need a stack. */
12014 if (rs6000_must_pass_in_stack (mode, type))
12017 /* If there is no incoming register, we need a stack. */
12018 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
12019 if (entry_parm == NULL)
12022 /* Likewise if we need to pass both in registers and on the stack. */
12023 if (GET_CODE (entry_parm) == PARALLEL
12024 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
12027 /* Also true if we're partially in registers and partially not. */
12028 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
12031 /* Update info on where next arg arrives in registers. */
12032 rs6000_function_arg_advance (args_so_far, mode, type, true);
12036 /* Return true if FUN has no prototype, has a variable argument
12037 list, or passes any parameter in memory. */
12040 rs6000_function_parms_need_stack (tree fun, bool incoming)
12042 tree fntype, result;
12043 CUMULATIVE_ARGS args_so_far_v;
12044 cumulative_args_t args_so_far;
12047 /* Must be a libcall, all of which only use reg parms. */
12052 fntype = TREE_TYPE (fun);
12054 /* Varargs functions need the parameter save area. */
12055 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
12058 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
12059 args_so_far = pack_cumulative_args (&args_so_far_v);
12061 /* When incoming, we will have been passed the function decl.
12062 It is necessary to use the decl to handle K&R style functions,
12063 where TYPE_ARG_TYPES may not be available. */
12066 gcc_assert (DECL_P (fun));
12067 result = DECL_RESULT (fun);
12070 result = TREE_TYPE (fntype);
12072 if (result && aggregate_value_p (result, fntype))
12074 if (!TYPE_P (result))
12075 result = TREE_TYPE (result);
12076 result = build_pointer_type (result);
12077 rs6000_parm_needs_stack (args_so_far, result);
12084 for (parm = DECL_ARGUMENTS (fun);
12085 parm && parm != void_list_node;
12086 parm = TREE_CHAIN (parm))
12087 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
12092 function_args_iterator args_iter;
12095 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
12096 if (rs6000_parm_needs_stack (args_so_far, arg_type))
12103 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
12104 usually a constant depending on the ABI. However, in the ELFv2 ABI
12105 the register parameter area is optional when calling a function that
12106 has a prototype is scope, has no variable argument list, and passes
12107 all parameters in registers. */
12110 rs6000_reg_parm_stack_space (tree fun, bool incoming)
12112 int reg_parm_stack_space;
12114 switch (DEFAULT_ABI)
12117 reg_parm_stack_space = 0;
12122 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12126 /* ??? Recomputing this every time is a bit expensive. Is there
12127 a place to cache this information? */
12128 if (rs6000_function_parms_need_stack (fun, incoming))
12129 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12131 reg_parm_stack_space = 0;
12135 return reg_parm_stack_space;
12139 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
12142 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
12147 for (i = 0; i < nregs; i++)
12149 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
12150 if (reload_completed)
12152 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
12155 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
12156 i * GET_MODE_SIZE (reg_mode));
12159 tem = replace_equiv_address (tem, XEXP (tem, 0));
12163 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
12167 /* Perform any needed actions needed for a function that is receiving a
12168 variable number of arguments.
12172 MODE and TYPE are the mode and type of the current parameter.
12174 PRETEND_SIZE is a variable that should be set to the amount of stack
12175 that must be pushed by the prolog to pretend that our caller pushed
12178 Normally, this macro will push all remaining incoming registers on the
12179 stack and set PRETEND_SIZE to the length of the registers pushed. */
12182 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12183 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12186 CUMULATIVE_ARGS next_cum;
12187 int reg_size = TARGET_32BIT ? 4 : 8;
12188 rtx save_area = NULL_RTX, mem;
12189 int first_reg_offset;
12190 alias_set_type set;
12192 /* Skip the last named argument. */
12193 next_cum = *get_cumulative_args (cum);
12194 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12196 if (DEFAULT_ABI == ABI_V4)
12198 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12202 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12203 HOST_WIDE_INT offset = 0;
12205 /* Try to optimize the size of the varargs save area.
12206 The ABI requires that ap.reg_save_area is doubleword
12207 aligned, but we don't need to allocate space for all
12208 the bytes, only those to which we actually will save
12210 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12211 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12212 if (TARGET_HARD_FLOAT
12213 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12214 && cfun->va_list_fpr_size)
12217 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12218 * UNITS_PER_FP_WORD;
12219 if (cfun->va_list_fpr_size
12220 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12221 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12223 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12224 * UNITS_PER_FP_WORD;
12228 offset = -((first_reg_offset * reg_size) & ~7);
12229 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12231 gpr_reg_num = cfun->va_list_gpr_size;
12232 if (reg_size == 4 && (first_reg_offset & 1))
12235 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12238 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12239 * UNITS_PER_FP_WORD
12240 - (int) (GP_ARG_NUM_REG * reg_size);
12242 if (gpr_size + fpr_size)
12245 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12246 gcc_assert (MEM_P (reg_save_area));
12247 reg_save_area = XEXP (reg_save_area, 0);
12248 if (GET_CODE (reg_save_area) == PLUS)
12250 gcc_assert (XEXP (reg_save_area, 0)
12251 == virtual_stack_vars_rtx);
12252 gcc_assert (CONST_INT_P (XEXP (reg_save_area, 1)));
12253 offset += INTVAL (XEXP (reg_save_area, 1));
12256 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
12259 cfun->machine->varargs_save_offset = offset;
12260 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
12265 first_reg_offset = next_cum.words;
12266 save_area = crtl->args.internal_arg_pointer;
12268 if (targetm.calls.must_pass_in_stack (mode, type))
12269 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
12272 set = get_varargs_alias_set ();
12273 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
12274 && cfun->va_list_gpr_size)
12276 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
12278 if (va_list_gpr_counter_field)
12279 /* V4 va_list_gpr_size counts number of registers needed. */
12280 n_gpr = cfun->va_list_gpr_size;
12282 /* char * va_list instead counts number of bytes needed. */
12283 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
12288 mem = gen_rtx_MEM (BLKmode,
12289 plus_constant (Pmode, save_area,
12290 first_reg_offset * reg_size));
12291 MEM_NOTRAP_P (mem) = 1;
12292 set_mem_alias_set (mem, set);
12293 set_mem_align (mem, BITS_PER_WORD);
12295 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
12299 /* Save FP registers if needed. */
12300 if (DEFAULT_ABI == ABI_V4
12301 && TARGET_HARD_FLOAT
12303 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12304 && cfun->va_list_fpr_size)
12306 int fregno = next_cum.fregno, nregs;
12307 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
12308 rtx lab = gen_label_rtx ();
12309 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
12310 * UNITS_PER_FP_WORD);
12313 (gen_rtx_SET (pc_rtx,
12314 gen_rtx_IF_THEN_ELSE (VOIDmode,
12315 gen_rtx_NE (VOIDmode, cr1,
12317 gen_rtx_LABEL_REF (VOIDmode, lab),
12321 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
12322 fregno++, off += UNITS_PER_FP_WORD, nregs++)
12324 mem = gen_rtx_MEM (TARGET_HARD_FLOAT ? DFmode : SFmode,
12325 plus_constant (Pmode, save_area, off));
12326 MEM_NOTRAP_P (mem) = 1;
12327 set_mem_alias_set (mem, set);
12328 set_mem_align (mem, GET_MODE_ALIGNMENT (
12329 TARGET_HARD_FLOAT ? DFmode : SFmode));
12330 emit_move_insn (mem, gen_rtx_REG (
12331 TARGET_HARD_FLOAT ? DFmode : SFmode, fregno));
12338 /* Create the va_list data type. */
12341 rs6000_build_builtin_va_list (void)
12343 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
12345 /* For AIX, prefer 'char *' because that's what the system
12346 header files like. */
12347 if (DEFAULT_ABI != ABI_V4)
12348 return build_pointer_type (char_type_node);
12350 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
12351 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
12352 get_identifier ("__va_list_tag"), record);
12354 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
12355 unsigned_char_type_node);
12356 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
12357 unsigned_char_type_node);
12358 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
12359 every user file. */
12360 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12361 get_identifier ("reserved"), short_unsigned_type_node);
12362 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12363 get_identifier ("overflow_arg_area"),
12365 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12366 get_identifier ("reg_save_area"),
12369 va_list_gpr_counter_field = f_gpr;
12370 va_list_fpr_counter_field = f_fpr;
12372 DECL_FIELD_CONTEXT (f_gpr) = record;
12373 DECL_FIELD_CONTEXT (f_fpr) = record;
12374 DECL_FIELD_CONTEXT (f_res) = record;
12375 DECL_FIELD_CONTEXT (f_ovf) = record;
12376 DECL_FIELD_CONTEXT (f_sav) = record;
12378 TYPE_STUB_DECL (record) = type_decl;
12379 TYPE_NAME (record) = type_decl;
12380 TYPE_FIELDS (record) = f_gpr;
12381 DECL_CHAIN (f_gpr) = f_fpr;
12382 DECL_CHAIN (f_fpr) = f_res;
12383 DECL_CHAIN (f_res) = f_ovf;
12384 DECL_CHAIN (f_ovf) = f_sav;
12386 layout_type (record);
12388 /* The correct type is an array type of one element. */
12389 return build_array_type (record, build_index_type (size_zero_node));
12392 /* Implement va_start. */
12395 rs6000_va_start (tree valist, rtx nextarg)
12397 HOST_WIDE_INT words, n_gpr, n_fpr;
12398 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12399 tree gpr, fpr, ovf, sav, t;
12401 /* Only SVR4 needs something special. */
12402 if (DEFAULT_ABI != ABI_V4)
12404 std_expand_builtin_va_start (valist, nextarg);
12408 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12409 f_fpr = DECL_CHAIN (f_gpr);
12410 f_res = DECL_CHAIN (f_fpr);
12411 f_ovf = DECL_CHAIN (f_res);
12412 f_sav = DECL_CHAIN (f_ovf);
12414 valist = build_simple_mem_ref (valist);
12415 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12416 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12418 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12420 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12423 /* Count number of gp and fp argument registers used. */
12424 words = crtl->args.info.words;
12425 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
12427 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
12430 if (TARGET_DEBUG_ARG)
12431 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
12432 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
12433 words, n_gpr, n_fpr);
12435 if (cfun->va_list_gpr_size)
12437 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12438 build_int_cst (NULL_TREE, n_gpr));
12439 TREE_SIDE_EFFECTS (t) = 1;
12440 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12443 if (cfun->va_list_fpr_size)
12445 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12446 build_int_cst (NULL_TREE, n_fpr));
12447 TREE_SIDE_EFFECTS (t) = 1;
12448 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12450 #ifdef HAVE_AS_GNU_ATTRIBUTE
12451 if (call_ABI_of_interest (cfun->decl))
12452 rs6000_passes_float = true;
12456 /* Find the overflow area. */
12457 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
12459 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
12460 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12461 TREE_SIDE_EFFECTS (t) = 1;
12462 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12464 /* If there were no va_arg invocations, don't set up the register
12466 if (!cfun->va_list_gpr_size
12467 && !cfun->va_list_fpr_size
12468 && n_gpr < GP_ARG_NUM_REG
12469 && n_fpr < FP_ARG_V4_MAX_REG)
12472 /* Find the register save area. */
12473 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
12474 if (cfun->machine->varargs_save_offset)
12475 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
12476 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12477 TREE_SIDE_EFFECTS (t) = 1;
12478 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12481 /* Implement va_arg. */
12484 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12485 gimple_seq *post_p)
12487 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12488 tree gpr, fpr, ovf, sav, reg, t, u;
12489 int size, rsize, n_reg, sav_ofs, sav_scale;
12490 tree lab_false, lab_over, addr;
12492 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
12496 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12498 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
12499 return build_va_arg_indirect_ref (t);
12502 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
12503 earlier version of gcc, with the property that it always applied alignment
12504 adjustments to the va-args (even for zero-sized types). The cheapest way
12505 to deal with this is to replicate the effect of the part of
12506 std_gimplify_va_arg_expr that carries out the align adjust, for the case
12508 We don't need to check for pass-by-reference because of the test above.
12509 We can return a simplifed answer, since we know there's no offset to add. */
12512 && rs6000_darwin64_abi)
12513 || DEFAULT_ABI == ABI_ELFv2
12514 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
12515 && integer_zerop (TYPE_SIZE (type)))
12517 unsigned HOST_WIDE_INT align, boundary;
12518 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
12519 align = PARM_BOUNDARY / BITS_PER_UNIT;
12520 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
12521 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
12522 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
12523 boundary /= BITS_PER_UNIT;
12524 if (boundary > align)
12527 /* This updates arg ptr by the amount that would be necessary
12528 to align the zero-sized (but not zero-alignment) item. */
12529 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12530 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
12531 gimplify_and_add (t, pre_p);
12533 t = fold_convert (sizetype, valist_tmp);
12534 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12535 fold_convert (TREE_TYPE (valist),
12536 fold_build2 (BIT_AND_EXPR, sizetype, t,
12537 size_int (-boundary))));
12538 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
12539 gimplify_and_add (t, pre_p);
12541 /* Since it is zero-sized there's no increment for the item itself. */
12542 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
12543 return build_va_arg_indirect_ref (valist_tmp);
12546 if (DEFAULT_ABI != ABI_V4)
12548 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
12550 tree elem_type = TREE_TYPE (type);
12551 machine_mode elem_mode = TYPE_MODE (elem_type);
12552 int elem_size = GET_MODE_SIZE (elem_mode);
12554 if (elem_size < UNITS_PER_WORD)
12556 tree real_part, imag_part;
12557 gimple_seq post = NULL;
12559 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12561 /* Copy the value into a temporary, lest the formal temporary
12562 be reused out from under us. */
12563 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
12564 gimple_seq_add_seq (pre_p, post);
12566 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12569 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
12573 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
12576 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12577 f_fpr = DECL_CHAIN (f_gpr);
12578 f_res = DECL_CHAIN (f_fpr);
12579 f_ovf = DECL_CHAIN (f_res);
12580 f_sav = DECL_CHAIN (f_ovf);
12582 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12583 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12585 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12587 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12590 size = int_size_in_bytes (type);
12591 rsize = (size + 3) / 4;
12592 int pad = 4 * rsize - size;
12595 machine_mode mode = TYPE_MODE (type);
12596 if (abi_v4_pass_in_fpr (mode, false))
12598 /* FP args go in FP registers, if present. */
12600 n_reg = (size + 7) / 8;
12601 sav_ofs = (TARGET_HARD_FLOAT ? 8 : 4) * 4;
12602 sav_scale = (TARGET_HARD_FLOAT ? 8 : 4);
12603 if (mode != SFmode && mode != SDmode)
12608 /* Otherwise into GP registers. */
12617 /* Pull the value out of the saved registers.... */
12620 addr = create_tmp_var (ptr_type_node, "addr");
12622 /* AltiVec vectors never go in registers when -mabi=altivec. */
12623 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12627 lab_false = create_artificial_label (input_location);
12628 lab_over = create_artificial_label (input_location);
12630 /* Long long is aligned in the registers. As are any other 2 gpr
12631 item such as complex int due to a historical mistake. */
12633 if (n_reg == 2 && reg == gpr)
12636 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12637 build_int_cst (TREE_TYPE (reg), n_reg - 1));
12638 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
12639 unshare_expr (reg), u);
12641 /* _Decimal128 is passed in even/odd fpr pairs; the stored
12642 reg number is 0 for f1, so we want to make it odd. */
12643 else if (reg == fpr && mode == TDmode)
12645 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12646 build_int_cst (TREE_TYPE (reg), 1));
12647 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
12650 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
12651 t = build2 (GE_EXPR, boolean_type_node, u, t);
12652 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12653 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12654 gimplify_and_add (t, pre_p);
12658 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12660 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12661 build_int_cst (TREE_TYPE (reg), n_reg));
12662 u = fold_convert (sizetype, u);
12663 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
12664 t = fold_build_pointer_plus (t, u);
12666 /* _Decimal32 varargs are located in the second word of the 64-bit
12667 FP register for 32-bit binaries. */
12668 if (TARGET_32BIT && TARGET_HARD_FLOAT && mode == SDmode)
12669 t = fold_build_pointer_plus_hwi (t, size);
12671 /* Args are passed right-aligned. */
12672 if (BYTES_BIG_ENDIAN)
12673 t = fold_build_pointer_plus_hwi (t, pad);
12675 gimplify_assign (addr, t, pre_p);
12677 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12679 stmt = gimple_build_label (lab_false);
12680 gimple_seq_add_stmt (pre_p, stmt);
12682 if ((n_reg == 2 && !regalign) || n_reg > 2)
12684 /* Ensure that we don't find any more args in regs.
12685 Alignment has taken care of for special cases. */
12686 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
12690 /* ... otherwise out of the overflow area. */
12692 /* Care for on-stack alignment if needed. */
12696 t = fold_build_pointer_plus_hwi (t, align - 1);
12697 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12698 build_int_cst (TREE_TYPE (t), -align));
12701 /* Args are passed right-aligned. */
12702 if (BYTES_BIG_ENDIAN)
12703 t = fold_build_pointer_plus_hwi (t, pad);
12705 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12707 gimplify_assign (unshare_expr (addr), t, pre_p);
12709 t = fold_build_pointer_plus_hwi (t, size);
12710 gimplify_assign (unshare_expr (ovf), t, pre_p);
12714 stmt = gimple_build_label (lab_over);
12715 gimple_seq_add_stmt (pre_p, stmt);
12718 if (STRICT_ALIGNMENT
12719 && (TYPE_ALIGN (type)
12720 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
12722 /* The value (of type complex double, for example) may not be
12723 aligned in memory in the saved registers, so copy via a
12724 temporary. (This is the same code as used for SPARC.) */
12725 tree tmp = create_tmp_var (type, "va_arg_tmp");
12726 tree dest_addr = build_fold_addr_expr (tmp);
12728 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
12729 3, dest_addr, addr, size_int (rsize * 4));
12730 TREE_ADDRESSABLE (tmp) = 1;
12732 gimplify_and_add (copy, pre_p);
12736 addr = fold_convert (ptrtype, addr);
12737 return build_va_arg_indirect_ref (addr);
12743 def_builtin (const char *name, tree type, enum rs6000_builtins code)
12746 unsigned classify = rs6000_builtin_info[(int)code].attr;
12747 const char *attr_string = "";
12749 gcc_assert (name != NULL);
12750 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
12752 if (rs6000_builtin_decls[(int)code])
12753 fatal_error (input_location,
12754 "internal error: builtin function %qs already processed",
12757 rs6000_builtin_decls[(int)code] = t =
12758 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
12760 /* Set any special attributes. */
12761 if ((classify & RS6000_BTC_CONST) != 0)
12763 /* const function, function only depends on the inputs. */
12764 TREE_READONLY (t) = 1;
12765 TREE_NOTHROW (t) = 1;
12766 attr_string = ", const";
12768 else if ((classify & RS6000_BTC_PURE) != 0)
12770 /* pure function, function can read global memory, but does not set any
12772 DECL_PURE_P (t) = 1;
12773 TREE_NOTHROW (t) = 1;
12774 attr_string = ", pure";
12776 else if ((classify & RS6000_BTC_FP) != 0)
12778 /* Function is a math function. If rounding mode is on, then treat the
12779 function as not reading global memory, but it can have arbitrary side
12780 effects. If it is off, then assume the function is a const function.
12781 This mimics the ATTR_MATHFN_FPROUNDING attribute in
12782 builtin-attribute.def that is used for the math functions. */
12783 TREE_NOTHROW (t) = 1;
12784 if (flag_rounding_math)
12786 DECL_PURE_P (t) = 1;
12787 DECL_IS_NOVOPS (t) = 1;
12788 attr_string = ", fp, pure";
12792 TREE_READONLY (t) = 1;
12793 attr_string = ", fp, const";
12796 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
12797 gcc_unreachable ();
12799 if (TARGET_DEBUG_BUILTIN)
12800 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
12801 (int)code, name, attr_string);
12804 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
12806 #undef RS6000_BUILTIN_0
12807 #undef RS6000_BUILTIN_1
12808 #undef RS6000_BUILTIN_2
12809 #undef RS6000_BUILTIN_3
12810 #undef RS6000_BUILTIN_A
12811 #undef RS6000_BUILTIN_D
12812 #undef RS6000_BUILTIN_H
12813 #undef RS6000_BUILTIN_P
12814 #undef RS6000_BUILTIN_X
12816 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12817 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12818 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12819 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
12820 { MASK, ICODE, NAME, ENUM },
12822 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12823 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12824 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12825 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12826 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12828 static const struct builtin_description bdesc_3arg[] =
12830 #include "rs6000-builtin.def"
12833 /* DST operations: void foo (void *, const int, const char). */
12835 #undef RS6000_BUILTIN_0
12836 #undef RS6000_BUILTIN_1
12837 #undef RS6000_BUILTIN_2
12838 #undef RS6000_BUILTIN_3
12839 #undef RS6000_BUILTIN_A
12840 #undef RS6000_BUILTIN_D
12841 #undef RS6000_BUILTIN_H
12842 #undef RS6000_BUILTIN_P
12843 #undef RS6000_BUILTIN_X
12845 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12846 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12847 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12848 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12849 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12850 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
12851 { MASK, ICODE, NAME, ENUM },
12853 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12854 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12855 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12857 static const struct builtin_description bdesc_dst[] =
12859 #include "rs6000-builtin.def"
12862 /* Simple binary operations: VECc = foo (VECa, VECb). */
12864 #undef RS6000_BUILTIN_0
12865 #undef RS6000_BUILTIN_1
12866 #undef RS6000_BUILTIN_2
12867 #undef RS6000_BUILTIN_3
12868 #undef RS6000_BUILTIN_A
12869 #undef RS6000_BUILTIN_D
12870 #undef RS6000_BUILTIN_H
12871 #undef RS6000_BUILTIN_P
12872 #undef RS6000_BUILTIN_X
12874 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12875 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12876 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
12877 { MASK, ICODE, NAME, ENUM },
12879 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12880 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12881 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12882 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12883 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12884 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12886 static const struct builtin_description bdesc_2arg[] =
12888 #include "rs6000-builtin.def"
12891 #undef RS6000_BUILTIN_0
12892 #undef RS6000_BUILTIN_1
12893 #undef RS6000_BUILTIN_2
12894 #undef RS6000_BUILTIN_3
12895 #undef RS6000_BUILTIN_A
12896 #undef RS6000_BUILTIN_D
12897 #undef RS6000_BUILTIN_H
12898 #undef RS6000_BUILTIN_P
12899 #undef RS6000_BUILTIN_X
12901 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12902 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12903 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12904 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12905 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12906 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12907 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12908 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
12909 { MASK, ICODE, NAME, ENUM },
12911 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12913 /* AltiVec predicates. */
12915 static const struct builtin_description bdesc_altivec_preds[] =
12917 #include "rs6000-builtin.def"
12920 /* ABS* operations. */
12922 #undef RS6000_BUILTIN_0
12923 #undef RS6000_BUILTIN_1
12924 #undef RS6000_BUILTIN_2
12925 #undef RS6000_BUILTIN_3
12926 #undef RS6000_BUILTIN_A
12927 #undef RS6000_BUILTIN_D
12928 #undef RS6000_BUILTIN_H
12929 #undef RS6000_BUILTIN_P
12930 #undef RS6000_BUILTIN_X
12932 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12933 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12934 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12935 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12936 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
12937 { MASK, ICODE, NAME, ENUM },
12939 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12940 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12941 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12942 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12944 static const struct builtin_description bdesc_abs[] =
12946 #include "rs6000-builtin.def"
12949 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
12952 #undef RS6000_BUILTIN_0
12953 #undef RS6000_BUILTIN_1
12954 #undef RS6000_BUILTIN_2
12955 #undef RS6000_BUILTIN_3
12956 #undef RS6000_BUILTIN_A
12957 #undef RS6000_BUILTIN_D
12958 #undef RS6000_BUILTIN_H
12959 #undef RS6000_BUILTIN_P
12960 #undef RS6000_BUILTIN_X
12962 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12963 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
12964 { MASK, ICODE, NAME, ENUM },
12966 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12967 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12968 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12969 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12970 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12971 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12972 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12974 static const struct builtin_description bdesc_1arg[] =
12976 #include "rs6000-builtin.def"
12979 /* Simple no-argument operations: result = __builtin_darn_32 () */
12981 #undef RS6000_BUILTIN_0
12982 #undef RS6000_BUILTIN_1
12983 #undef RS6000_BUILTIN_2
12984 #undef RS6000_BUILTIN_3
12985 #undef RS6000_BUILTIN_A
12986 #undef RS6000_BUILTIN_D
12987 #undef RS6000_BUILTIN_H
12988 #undef RS6000_BUILTIN_P
12989 #undef RS6000_BUILTIN_X
12991 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
12992 { MASK, ICODE, NAME, ENUM },
12994 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12995 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12996 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12997 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12998 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12999 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13000 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13001 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13003 static const struct builtin_description bdesc_0arg[] =
13005 #include "rs6000-builtin.def"
13008 /* HTM builtins. */
13009 #undef RS6000_BUILTIN_0
13010 #undef RS6000_BUILTIN_1
13011 #undef RS6000_BUILTIN_2
13012 #undef RS6000_BUILTIN_3
13013 #undef RS6000_BUILTIN_A
13014 #undef RS6000_BUILTIN_D
13015 #undef RS6000_BUILTIN_H
13016 #undef RS6000_BUILTIN_P
13017 #undef RS6000_BUILTIN_X
13019 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13020 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13021 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13022 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13023 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13024 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13025 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
13026 { MASK, ICODE, NAME, ENUM },
13028 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13029 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13031 static const struct builtin_description bdesc_htm[] =
13033 #include "rs6000-builtin.def"
13036 #undef RS6000_BUILTIN_0
13037 #undef RS6000_BUILTIN_1
13038 #undef RS6000_BUILTIN_2
13039 #undef RS6000_BUILTIN_3
13040 #undef RS6000_BUILTIN_A
13041 #undef RS6000_BUILTIN_D
13042 #undef RS6000_BUILTIN_H
13043 #undef RS6000_BUILTIN_P
13045 /* Return true if a builtin function is overloaded. */
13047 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
13049 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
13053 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
13055 return rs6000_builtin_info[(int)fncode].name;
13058 /* Expand an expression EXP that calls a builtin without arguments. */
13060 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
13063 machine_mode tmode = insn_data[icode].operand[0].mode;
13065 if (icode == CODE_FOR_nothing)
13066 /* Builtin not supported on this processor. */
13069 if (icode == CODE_FOR_rs6000_mffsl
13070 && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13072 error ("%<__builtin_mffsl%> not supported with %<-msoft-float%>");
13077 || GET_MODE (target) != tmode
13078 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13079 target = gen_reg_rtx (tmode);
13081 pat = GEN_FCN (icode) (target);
13091 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
13094 tree arg0 = CALL_EXPR_ARG (exp, 0);
13095 tree arg1 = CALL_EXPR_ARG (exp, 1);
13096 rtx op0 = expand_normal (arg0);
13097 rtx op1 = expand_normal (arg1);
13098 machine_mode mode0 = insn_data[icode].operand[0].mode;
13099 machine_mode mode1 = insn_data[icode].operand[1].mode;
13101 if (icode == CODE_FOR_nothing)
13102 /* Builtin not supported on this processor. */
13105 /* If we got invalid arguments bail out before generating bad rtl. */
13106 if (arg0 == error_mark_node || arg1 == error_mark_node)
13109 if (!CONST_INT_P (op0)
13110 || INTVAL (op0) > 255
13111 || INTVAL (op0) < 0)
13113 error ("argument 1 must be an 8-bit field value");
13117 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13118 op0 = copy_to_mode_reg (mode0, op0);
13120 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13121 op1 = copy_to_mode_reg (mode1, op1);
13123 pat = GEN_FCN (icode) (op0, op1);
13132 rs6000_expand_mtfsb_builtin (enum insn_code icode, tree exp)
13135 tree arg0 = CALL_EXPR_ARG (exp, 0);
13136 rtx op0 = expand_normal (arg0);
13138 if (icode == CODE_FOR_nothing)
13139 /* Builtin not supported on this processor. */
13142 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13144 error ("%<__builtin_mtfsb0%> and %<__builtin_mtfsb1%> not supported with "
13145 "%<-msoft-float%>");
13149 /* If we got invalid arguments bail out before generating bad rtl. */
13150 if (arg0 == error_mark_node)
13153 /* Only allow bit numbers 0 to 31. */
13154 if (!u5bit_cint_operand (op0, VOIDmode))
13156 error ("Argument must be a constant between 0 and 31.");
13160 pat = GEN_FCN (icode) (op0);
13169 rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp)
13172 tree arg0 = CALL_EXPR_ARG (exp, 0);
13173 rtx op0 = expand_normal (arg0);
13174 machine_mode mode0 = insn_data[icode].operand[0].mode;
13176 if (icode == CODE_FOR_nothing)
13177 /* Builtin not supported on this processor. */
13180 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13182 error ("%<__builtin_set_fpscr_rn%> not supported with %<-msoft-float%>");
13186 /* If we got invalid arguments bail out before generating bad rtl. */
13187 if (arg0 == error_mark_node)
13190 /* If the argument is a constant, check the range. Argument can only be a
13191 2-bit value. Unfortunately, can't check the range of the value at
13192 compile time if the argument is a variable. The least significant two
13193 bits of the argument, regardless of type, are used to set the rounding
13194 mode. All other bits are ignored. */
13195 if (CONST_INT_P (op0) && !const_0_to_3_operand(op0, VOIDmode))
13197 error ("Argument must be a value between 0 and 3.");
13201 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13202 op0 = copy_to_mode_reg (mode0, op0);
13204 pat = GEN_FCN (icode) (op0);
13212 rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp)
13215 tree arg0 = CALL_EXPR_ARG (exp, 0);
13216 rtx op0 = expand_normal (arg0);
13217 machine_mode mode0 = insn_data[icode].operand[0].mode;
13220 /* Builtin not supported in 32-bit mode. */
13221 fatal_error (input_location,
13222 "%<__builtin_set_fpscr_drn%> is not supported "
13223 "in 32-bit mode.");
13225 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13227 error ("%<__builtin_set_fpscr_drn%> not supported with %<-msoft-float%>");
13231 if (icode == CODE_FOR_nothing)
13232 /* Builtin not supported on this processor. */
13235 /* If we got invalid arguments bail out before generating bad rtl. */
13236 if (arg0 == error_mark_node)
13239 /* If the argument is a constant, check the range. Agrument can only be a
13240 3-bit value. Unfortunately, can't check the range of the value at
13241 compile time if the argument is a variable. The least significant two
13242 bits of the argument, regardless of type, are used to set the rounding
13243 mode. All other bits are ignored. */
13244 if (CONST_INT_P (op0) && !const_0_to_7_operand(op0, VOIDmode))
13246 error ("Argument must be a value between 0 and 7.");
13250 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13251 op0 = copy_to_mode_reg (mode0, op0);
13253 pat = GEN_FCN (icode) (op0);
13262 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
13265 tree arg0 = CALL_EXPR_ARG (exp, 0);
13266 rtx op0 = expand_normal (arg0);
13267 machine_mode tmode = insn_data[icode].operand[0].mode;
13268 machine_mode mode0 = insn_data[icode].operand[1].mode;
13270 if (icode == CODE_FOR_nothing)
13271 /* Builtin not supported on this processor. */
13274 /* If we got invalid arguments bail out before generating bad rtl. */
13275 if (arg0 == error_mark_node)
13278 if (icode == CODE_FOR_altivec_vspltisb
13279 || icode == CODE_FOR_altivec_vspltish
13280 || icode == CODE_FOR_altivec_vspltisw)
13282 /* Only allow 5-bit *signed* literals. */
13283 if (!CONST_INT_P (op0)
13284 || INTVAL (op0) > 15
13285 || INTVAL (op0) < -16)
13287 error ("argument 1 must be a 5-bit signed literal");
13288 return CONST0_RTX (tmode);
13293 || GET_MODE (target) != tmode
13294 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13295 target = gen_reg_rtx (tmode);
13297 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13298 op0 = copy_to_mode_reg (mode0, op0);
13300 pat = GEN_FCN (icode) (target, op0);
13309 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
13311 rtx pat, scratch1, scratch2;
13312 tree arg0 = CALL_EXPR_ARG (exp, 0);
13313 rtx op0 = expand_normal (arg0);
13314 machine_mode tmode = insn_data[icode].operand[0].mode;
13315 machine_mode mode0 = insn_data[icode].operand[1].mode;
13317 /* If we have invalid arguments, bail out before generating bad rtl. */
13318 if (arg0 == error_mark_node)
13322 || GET_MODE (target) != tmode
13323 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13324 target = gen_reg_rtx (tmode);
13326 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13327 op0 = copy_to_mode_reg (mode0, op0);
13329 scratch1 = gen_reg_rtx (mode0);
13330 scratch2 = gen_reg_rtx (mode0);
13332 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
13341 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
13344 tree arg0 = CALL_EXPR_ARG (exp, 0);
13345 tree arg1 = CALL_EXPR_ARG (exp, 1);
13346 rtx op0 = expand_normal (arg0);
13347 rtx op1 = expand_normal (arg1);
13348 machine_mode tmode = insn_data[icode].operand[0].mode;
13349 machine_mode mode0 = insn_data[icode].operand[1].mode;
13350 machine_mode mode1 = insn_data[icode].operand[2].mode;
13352 if (icode == CODE_FOR_nothing)
13353 /* Builtin not supported on this processor. */
13356 /* If we got invalid arguments bail out before generating bad rtl. */
13357 if (arg0 == error_mark_node || arg1 == error_mark_node)
13360 if (icode == CODE_FOR_unpackv1ti
13361 || icode == CODE_FOR_unpackkf
13362 || icode == CODE_FOR_unpacktf
13363 || icode == CODE_FOR_unpackif
13364 || icode == CODE_FOR_unpacktd)
13366 /* Only allow 1-bit unsigned literals. */
13368 if (TREE_CODE (arg1) != INTEGER_CST
13369 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
13371 error ("argument 2 must be a 1-bit unsigned literal");
13372 return CONST0_RTX (tmode);
13375 else if (icode == CODE_FOR_altivec_vspltw)
13377 /* Only allow 2-bit unsigned literals. */
13379 if (TREE_CODE (arg1) != INTEGER_CST
13380 || TREE_INT_CST_LOW (arg1) & ~3)
13382 error ("argument 2 must be a 2-bit unsigned literal");
13383 return CONST0_RTX (tmode);
13386 else if (icode == CODE_FOR_altivec_vsplth)
13388 /* Only allow 3-bit unsigned literals. */
13390 if (TREE_CODE (arg1) != INTEGER_CST
13391 || TREE_INT_CST_LOW (arg1) & ~7)
13393 error ("argument 2 must be a 3-bit unsigned literal");
13394 return CONST0_RTX (tmode);
13397 else if (icode == CODE_FOR_altivec_vspltb)
13399 /* Only allow 4-bit unsigned literals. */
13401 if (TREE_CODE (arg1) != INTEGER_CST
13402 || TREE_INT_CST_LOW (arg1) & ~15)
13404 error ("argument 2 must be a 4-bit unsigned literal");
13405 return CONST0_RTX (tmode);
13408 else if (icode == CODE_FOR_altivec_vcfux
13409 || icode == CODE_FOR_altivec_vcfsx
13410 || icode == CODE_FOR_altivec_vctsxs
13411 || icode == CODE_FOR_altivec_vctuxs)
13413 /* Only allow 5-bit unsigned literals. */
13415 if (TREE_CODE (arg1) != INTEGER_CST
13416 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13418 error ("argument 2 must be a 5-bit unsigned literal");
13419 return CONST0_RTX (tmode);
13422 else if (icode == CODE_FOR_dfptstsfi_eq_dd
13423 || icode == CODE_FOR_dfptstsfi_lt_dd
13424 || icode == CODE_FOR_dfptstsfi_gt_dd
13425 || icode == CODE_FOR_dfptstsfi_unordered_dd
13426 || icode == CODE_FOR_dfptstsfi_eq_td
13427 || icode == CODE_FOR_dfptstsfi_lt_td
13428 || icode == CODE_FOR_dfptstsfi_gt_td
13429 || icode == CODE_FOR_dfptstsfi_unordered_td)
13431 /* Only allow 6-bit unsigned literals. */
13433 if (TREE_CODE (arg0) != INTEGER_CST
13434 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
13436 error ("argument 1 must be a 6-bit unsigned literal");
13437 return CONST0_RTX (tmode);
13440 else if (icode == CODE_FOR_xststdcqp_kf
13441 || icode == CODE_FOR_xststdcqp_tf
13442 || icode == CODE_FOR_xststdcdp
13443 || icode == CODE_FOR_xststdcsp
13444 || icode == CODE_FOR_xvtstdcdp
13445 || icode == CODE_FOR_xvtstdcsp)
13447 /* Only allow 7-bit unsigned literals. */
13449 if (TREE_CODE (arg1) != INTEGER_CST
13450 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
13452 error ("argument 2 must be a 7-bit unsigned literal");
13453 return CONST0_RTX (tmode);
13458 || GET_MODE (target) != tmode
13459 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13460 target = gen_reg_rtx (tmode);
13462 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13463 op0 = copy_to_mode_reg (mode0, op0);
13464 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13465 op1 = copy_to_mode_reg (mode1, op1);
13467 pat = GEN_FCN (icode) (target, op0, op1);
13476 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13479 tree cr6_form = CALL_EXPR_ARG (exp, 0);
13480 tree arg0 = CALL_EXPR_ARG (exp, 1);
13481 tree arg1 = CALL_EXPR_ARG (exp, 2);
13482 rtx op0 = expand_normal (arg0);
13483 rtx op1 = expand_normal (arg1);
13484 machine_mode tmode = SImode;
13485 machine_mode mode0 = insn_data[icode].operand[1].mode;
13486 machine_mode mode1 = insn_data[icode].operand[2].mode;
13489 if (TREE_CODE (cr6_form) != INTEGER_CST)
13491 error ("argument 1 of %qs must be a constant",
13492 "__builtin_altivec_predicate");
13496 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
13498 gcc_assert (mode0 == mode1);
13500 /* If we have invalid arguments, bail out before generating bad rtl. */
13501 if (arg0 == error_mark_node || arg1 == error_mark_node)
13505 || GET_MODE (target) != tmode
13506 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13507 target = gen_reg_rtx (tmode);
13509 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13510 op0 = copy_to_mode_reg (mode0, op0);
13511 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13512 op1 = copy_to_mode_reg (mode1, op1);
13514 /* Note that for many of the relevant operations (e.g. cmpne or
13515 cmpeq) with float or double operands, it makes more sense for the
13516 mode of the allocated scratch register to select a vector of
13517 integer. But the choice to copy the mode of operand 0 was made
13518 long ago and there are no plans to change it. */
13519 scratch = gen_reg_rtx (mode0);
13521 pat = GEN_FCN (icode) (scratch, op0, op1);
13526 /* The vec_any* and vec_all* predicates use the same opcodes for two
13527 different operations, but the bits in CR6 will be different
13528 depending on what information we want. So we have to play tricks
13529 with CR6 to get the right bits out.
13531 If you think this is disgusting, look at the specs for the
13532 AltiVec predicates. */
13534 switch (cr6_form_int)
13537 emit_insn (gen_cr6_test_for_zero (target));
13540 emit_insn (gen_cr6_test_for_zero_reverse (target));
13543 emit_insn (gen_cr6_test_for_lt (target));
13546 emit_insn (gen_cr6_test_for_lt_reverse (target));
13549 error ("argument 1 of %qs is out of range",
13550 "__builtin_altivec_predicate");
13558 swap_endian_selector_for_mode (machine_mode mode)
13560 unsigned int swap1[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
13561 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
13562 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
13563 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
13565 unsigned int *swaparray, i;
13585 gcc_unreachable ();
13588 for (i = 0; i < 16; ++i)
13589 perm[i] = GEN_INT (swaparray[i]);
13591 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode,
13592 gen_rtvec_v (16, perm)));
13596 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
13599 tree arg0 = CALL_EXPR_ARG (exp, 0);
13600 tree arg1 = CALL_EXPR_ARG (exp, 1);
13601 machine_mode tmode = insn_data[icode].operand[0].mode;
13602 machine_mode mode0 = Pmode;
13603 machine_mode mode1 = Pmode;
13604 rtx op0 = expand_normal (arg0);
13605 rtx op1 = expand_normal (arg1);
13607 if (icode == CODE_FOR_nothing)
13608 /* Builtin not supported on this processor. */
13611 /* If we got invalid arguments bail out before generating bad rtl. */
13612 if (arg0 == error_mark_node || arg1 == error_mark_node)
13616 || GET_MODE (target) != tmode
13617 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13618 target = gen_reg_rtx (tmode);
13620 op1 = copy_to_mode_reg (mode1, op1);
13622 /* For LVX, express the RTL accurately by ANDing the address with -16.
13623 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
13624 so the raw address is fine. */
13625 if (icode == CODE_FOR_altivec_lvx_v1ti
13626 || icode == CODE_FOR_altivec_lvx_v2df
13627 || icode == CODE_FOR_altivec_lvx_v2di
13628 || icode == CODE_FOR_altivec_lvx_v4sf
13629 || icode == CODE_FOR_altivec_lvx_v4si
13630 || icode == CODE_FOR_altivec_lvx_v8hi
13631 || icode == CODE_FOR_altivec_lvx_v16qi)
13634 if (op0 == const0_rtx)
13638 op0 = copy_to_mode_reg (mode0, op0);
13639 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
13641 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13642 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
13644 emit_insn (gen_rtx_SET (target, addr));
13648 if (op0 == const0_rtx)
13649 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
13652 op0 = copy_to_mode_reg (mode0, op0);
13653 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
13654 gen_rtx_PLUS (Pmode, op1, op0));
13657 pat = GEN_FCN (icode) (target, addr);
13667 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
13670 tree arg0 = CALL_EXPR_ARG (exp, 0);
13671 tree arg1 = CALL_EXPR_ARG (exp, 1);
13672 tree arg2 = CALL_EXPR_ARG (exp, 2);
13673 rtx op0 = expand_normal (arg0);
13674 rtx op1 = expand_normal (arg1);
13675 rtx op2 = expand_normal (arg2);
13676 machine_mode mode0 = insn_data[icode].operand[0].mode;
13677 machine_mode mode1 = insn_data[icode].operand[1].mode;
13678 machine_mode mode2 = insn_data[icode].operand[2].mode;
13680 if (icode == CODE_FOR_nothing)
13681 /* Builtin not supported on this processor. */
13684 /* If we got invalid arguments bail out before generating bad rtl. */
13685 if (arg0 == error_mark_node
13686 || arg1 == error_mark_node
13687 || arg2 == error_mark_node)
13690 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13691 op0 = copy_to_mode_reg (mode0, op0);
13692 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13693 op1 = copy_to_mode_reg (mode1, op1);
13694 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13695 op2 = copy_to_mode_reg (mode2, op2);
13697 pat = GEN_FCN (icode) (op0, op1, op2);
13705 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
13707 tree arg0 = CALL_EXPR_ARG (exp, 0);
13708 tree arg1 = CALL_EXPR_ARG (exp, 1);
13709 tree arg2 = CALL_EXPR_ARG (exp, 2);
13710 rtx op0 = expand_normal (arg0);
13711 rtx op1 = expand_normal (arg1);
13712 rtx op2 = expand_normal (arg2);
13713 rtx pat, addr, rawaddr;
13714 machine_mode tmode = insn_data[icode].operand[0].mode;
13715 machine_mode smode = insn_data[icode].operand[1].mode;
13716 machine_mode mode1 = Pmode;
13717 machine_mode mode2 = Pmode;
13719 /* Invalid arguments. Bail before doing anything stoopid! */
13720 if (arg0 == error_mark_node
13721 || arg1 == error_mark_node
13722 || arg2 == error_mark_node)
13725 op2 = copy_to_mode_reg (mode2, op2);
13727 /* For STVX, express the RTL accurately by ANDing the address with -16.
13728 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
13729 so the raw address is fine. */
13730 if (icode == CODE_FOR_altivec_stvx_v2df
13731 || icode == CODE_FOR_altivec_stvx_v2di
13732 || icode == CODE_FOR_altivec_stvx_v4sf
13733 || icode == CODE_FOR_altivec_stvx_v4si
13734 || icode == CODE_FOR_altivec_stvx_v8hi
13735 || icode == CODE_FOR_altivec_stvx_v16qi)
13737 if (op1 == const0_rtx)
13741 op1 = copy_to_mode_reg (mode1, op1);
13742 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
13745 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13746 addr = gen_rtx_MEM (tmode, addr);
13748 op0 = copy_to_mode_reg (tmode, op0);
13750 emit_insn (gen_rtx_SET (addr, op0));
13754 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
13755 op0 = copy_to_mode_reg (smode, op0);
13757 if (op1 == const0_rtx)
13758 addr = gen_rtx_MEM (tmode, op2);
13761 op1 = copy_to_mode_reg (mode1, op1);
13762 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
13765 pat = GEN_FCN (icode) (addr, op0);
13773 /* Return the appropriate SPR number associated with the given builtin. */
13774 static inline HOST_WIDE_INT
13775 htm_spr_num (enum rs6000_builtins code)
13777 if (code == HTM_BUILTIN_GET_TFHAR
13778 || code == HTM_BUILTIN_SET_TFHAR)
13780 else if (code == HTM_BUILTIN_GET_TFIAR
13781 || code == HTM_BUILTIN_SET_TFIAR)
13783 else if (code == HTM_BUILTIN_GET_TEXASR
13784 || code == HTM_BUILTIN_SET_TEXASR)
13786 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
13787 || code == HTM_BUILTIN_SET_TEXASRU);
13788 return TEXASRU_SPR;
13791 /* Return the correct ICODE value depending on whether we are
13792 setting or reading the HTM SPRs. */
13793 static inline enum insn_code
13794 rs6000_htm_spr_icode (bool nonvoid)
13797 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
13799 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
13802 /* Expand the HTM builtin in EXP and store the result in TARGET.
13803 Store true in *EXPANDEDP if we found a builtin to expand. */
13805 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
13807 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13808 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
13809 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13810 const struct builtin_description *d;
13815 if (!TARGET_POWERPC64
13816 && (fcode == HTM_BUILTIN_TABORTDC
13817 || fcode == HTM_BUILTIN_TABORTDCI))
13819 size_t uns_fcode = (size_t)fcode;
13820 const char *name = rs6000_builtin_info[uns_fcode].name;
13821 error ("builtin %qs is only valid in 64-bit mode", name);
13825 /* Expand the HTM builtins. */
13827 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
13828 if (d->code == fcode)
13830 rtx op[MAX_HTM_OPERANDS], pat;
13833 call_expr_arg_iterator iter;
13834 unsigned attr = rs6000_builtin_info[fcode].attr;
13835 enum insn_code icode = d->icode;
13836 const struct insn_operand_data *insn_op;
13837 bool uses_spr = (attr & RS6000_BTC_SPR);
13841 icode = rs6000_htm_spr_icode (nonvoid);
13842 insn_op = &insn_data[icode].operand[0];
13846 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
13848 || GET_MODE (target) != tmode
13849 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
13850 target = gen_reg_rtx (tmode);
13852 op[nopnds++] = target;
13855 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
13857 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
13860 insn_op = &insn_data[icode].operand[nopnds];
13862 op[nopnds] = expand_normal (arg);
13864 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
13866 if (!strcmp (insn_op->constraint, "n"))
13868 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
13869 if (!CONST_INT_P (op[nopnds]))
13870 error ("argument %d must be an unsigned literal", arg_num);
13872 error ("argument %d is an unsigned literal that is "
13873 "out of range", arg_num);
13876 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
13882 /* Handle the builtins for extended mnemonics. These accept
13883 no arguments, but map to builtins that take arguments. */
13886 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
13887 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
13888 op[nopnds++] = GEN_INT (1);
13890 attr |= RS6000_BTC_UNARY;
13892 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
13893 op[nopnds++] = GEN_INT (0);
13895 attr |= RS6000_BTC_UNARY;
13901 /* If this builtin accesses SPRs, then pass in the appropriate
13902 SPR number and SPR regno as the last two operands. */
13905 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
13906 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
13908 /* If this builtin accesses a CR, then pass in a scratch
13909 CR as the last operand. */
13910 else if (attr & RS6000_BTC_CR)
13911 { cr = gen_reg_rtx (CCmode);
13917 int expected_nopnds = 0;
13918 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
13919 expected_nopnds = 1;
13920 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
13921 expected_nopnds = 2;
13922 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
13923 expected_nopnds = 3;
13924 if (!(attr & RS6000_BTC_VOID))
13925 expected_nopnds += 1;
13927 expected_nopnds += 1;
13929 gcc_assert (nopnds == expected_nopnds
13930 && nopnds <= MAX_HTM_OPERANDS);
13936 pat = GEN_FCN (icode) (op[0]);
13939 pat = GEN_FCN (icode) (op[0], op[1]);
13942 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
13945 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
13948 gcc_unreachable ();
13954 if (attr & RS6000_BTC_CR)
13956 if (fcode == HTM_BUILTIN_TBEGIN)
13958 /* Emit code to set TARGET to true or false depending on
13959 whether the tbegin. instruction successfully or failed
13960 to start a transaction. We do this by placing the 1's
13961 complement of CR's EQ bit into TARGET. */
13962 rtx scratch = gen_reg_rtx (SImode);
13963 emit_insn (gen_rtx_SET (scratch,
13964 gen_rtx_EQ (SImode, cr,
13966 emit_insn (gen_rtx_SET (target,
13967 gen_rtx_XOR (SImode, scratch,
13972 /* Emit code to copy the 4-bit condition register field
13973 CR into the least significant end of register TARGET. */
13974 rtx scratch1 = gen_reg_rtx (SImode);
13975 rtx scratch2 = gen_reg_rtx (SImode);
13976 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
13977 emit_insn (gen_movcc (subreg, cr));
13978 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
13979 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
13988 *expandedp = false;
13992 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
13995 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
13998 /* __builtin_cpu_init () is a nop, so expand to nothing. */
13999 if (fcode == RS6000_BUILTIN_CPU_INIT)
14002 if (target == 0 || GET_MODE (target) != SImode)
14003 target = gen_reg_rtx (SImode);
14005 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
14006 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
14007 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
14008 to a STRING_CST. */
14009 if (TREE_CODE (arg) == ARRAY_REF
14010 && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST
14011 && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST
14012 && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0)
14013 arg = TREE_OPERAND (arg, 0);
14015 if (TREE_CODE (arg) != STRING_CST)
14017 error ("builtin %qs only accepts a string argument",
14018 rs6000_builtin_info[(size_t) fcode].name);
14022 if (fcode == RS6000_BUILTIN_CPU_IS)
14024 const char *cpu = TREE_STRING_POINTER (arg);
14025 rtx cpuid = NULL_RTX;
14026 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
14027 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
14029 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
14030 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
14033 if (cpuid == NULL_RTX)
14035 /* Invalid CPU argument. */
14036 error ("cpu %qs is an invalid argument to builtin %qs",
14037 cpu, rs6000_builtin_info[(size_t) fcode].name);
14041 rtx platform = gen_reg_rtx (SImode);
14042 rtx tcbmem = gen_const_mem (SImode,
14043 gen_rtx_PLUS (Pmode,
14044 gen_rtx_REG (Pmode, TLS_REGNUM),
14045 GEN_INT (TCB_PLATFORM_OFFSET)));
14046 emit_move_insn (platform, tcbmem);
14047 emit_insn (gen_eqsi3 (target, platform, cpuid));
14049 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
14051 const char *hwcap = TREE_STRING_POINTER (arg);
14052 rtx mask = NULL_RTX;
14054 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
14055 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
14057 mask = GEN_INT (cpu_supports_info[i].mask);
14058 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
14061 if (mask == NULL_RTX)
14063 /* Invalid HWCAP argument. */
14064 error ("%s %qs is an invalid argument to builtin %qs",
14065 "hwcap", hwcap, rs6000_builtin_info[(size_t) fcode].name);
14069 rtx tcb_hwcap = gen_reg_rtx (SImode);
14070 rtx tcbmem = gen_const_mem (SImode,
14071 gen_rtx_PLUS (Pmode,
14072 gen_rtx_REG (Pmode, TLS_REGNUM),
14073 GEN_INT (hwcap_offset)));
14074 emit_move_insn (tcb_hwcap, tcbmem);
14075 rtx scratch1 = gen_reg_rtx (SImode);
14076 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
14077 rtx scratch2 = gen_reg_rtx (SImode);
14078 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
14079 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
14082 gcc_unreachable ();
14084 /* Record that we have expanded a CPU builtin, so that we can later
14085 emit a reference to the special symbol exported by LIBC to ensure we
14086 do not link against an old LIBC that doesn't support this feature. */
14087 cpu_builtin_p = true;
14090 warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware "
14091 "capability bits", rs6000_builtin_info[(size_t) fcode].name);
14093 /* For old LIBCs, always return FALSE. */
14094 emit_move_insn (target, GEN_INT (0));
14095 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
14101 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
14104 tree arg0 = CALL_EXPR_ARG (exp, 0);
14105 tree arg1 = CALL_EXPR_ARG (exp, 1);
14106 tree arg2 = CALL_EXPR_ARG (exp, 2);
14107 rtx op0 = expand_normal (arg0);
14108 rtx op1 = expand_normal (arg1);
14109 rtx op2 = expand_normal (arg2);
14110 machine_mode tmode = insn_data[icode].operand[0].mode;
14111 machine_mode mode0 = insn_data[icode].operand[1].mode;
14112 machine_mode mode1 = insn_data[icode].operand[2].mode;
14113 machine_mode mode2 = insn_data[icode].operand[3].mode;
14115 if (icode == CODE_FOR_nothing)
14116 /* Builtin not supported on this processor. */
14119 /* If we got invalid arguments bail out before generating bad rtl. */
14120 if (arg0 == error_mark_node
14121 || arg1 == error_mark_node
14122 || arg2 == error_mark_node)
14125 /* Check and prepare argument depending on the instruction code.
14127 Note that a switch statement instead of the sequence of tests
14128 would be incorrect as many of the CODE_FOR values could be
14129 CODE_FOR_nothing and that would yield multiple alternatives
14130 with identical values. We'd never reach here at runtime in
14132 if (icode == CODE_FOR_altivec_vsldoi_v4sf
14133 || icode == CODE_FOR_altivec_vsldoi_v2df
14134 || icode == CODE_FOR_altivec_vsldoi_v4si
14135 || icode == CODE_FOR_altivec_vsldoi_v8hi
14136 || icode == CODE_FOR_altivec_vsldoi_v16qi)
14138 /* Only allow 4-bit unsigned literals. */
14140 if (TREE_CODE (arg2) != INTEGER_CST
14141 || TREE_INT_CST_LOW (arg2) & ~0xf)
14143 error ("argument 3 must be a 4-bit unsigned literal");
14144 return CONST0_RTX (tmode);
14147 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
14148 || icode == CODE_FOR_vsx_xxpermdi_v2di
14149 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
14150 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
14151 || icode == CODE_FOR_vsx_xxpermdi_v1ti
14152 || icode == CODE_FOR_vsx_xxpermdi_v4sf
14153 || icode == CODE_FOR_vsx_xxpermdi_v4si
14154 || icode == CODE_FOR_vsx_xxpermdi_v8hi
14155 || icode == CODE_FOR_vsx_xxpermdi_v16qi
14156 || icode == CODE_FOR_vsx_xxsldwi_v16qi
14157 || icode == CODE_FOR_vsx_xxsldwi_v8hi
14158 || icode == CODE_FOR_vsx_xxsldwi_v4si
14159 || icode == CODE_FOR_vsx_xxsldwi_v4sf
14160 || icode == CODE_FOR_vsx_xxsldwi_v2di
14161 || icode == CODE_FOR_vsx_xxsldwi_v2df)
14163 /* Only allow 2-bit unsigned literals. */
14165 if (TREE_CODE (arg2) != INTEGER_CST
14166 || TREE_INT_CST_LOW (arg2) & ~0x3)
14168 error ("argument 3 must be a 2-bit unsigned literal");
14169 return CONST0_RTX (tmode);
14172 else if (icode == CODE_FOR_vsx_set_v2df
14173 || icode == CODE_FOR_vsx_set_v2di
14174 || icode == CODE_FOR_bcdadd
14175 || icode == CODE_FOR_bcdadd_lt
14176 || icode == CODE_FOR_bcdadd_eq
14177 || icode == CODE_FOR_bcdadd_gt
14178 || icode == CODE_FOR_bcdsub
14179 || icode == CODE_FOR_bcdsub_lt
14180 || icode == CODE_FOR_bcdsub_eq
14181 || icode == CODE_FOR_bcdsub_gt)
14183 /* Only allow 1-bit unsigned literals. */
14185 if (TREE_CODE (arg2) != INTEGER_CST
14186 || TREE_INT_CST_LOW (arg2) & ~0x1)
14188 error ("argument 3 must be a 1-bit unsigned literal");
14189 return CONST0_RTX (tmode);
14192 else if (icode == CODE_FOR_dfp_ddedpd_dd
14193 || icode == CODE_FOR_dfp_ddedpd_td)
14195 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
14197 if (TREE_CODE (arg0) != INTEGER_CST
14198 || TREE_INT_CST_LOW (arg2) & ~0x3)
14200 error ("argument 1 must be 0 or 2");
14201 return CONST0_RTX (tmode);
14204 else if (icode == CODE_FOR_dfp_denbcd_dd
14205 || icode == CODE_FOR_dfp_denbcd_td)
14207 /* Only allow 1-bit unsigned literals. */
14209 if (TREE_CODE (arg0) != INTEGER_CST
14210 || TREE_INT_CST_LOW (arg0) & ~0x1)
14212 error ("argument 1 must be a 1-bit unsigned literal");
14213 return CONST0_RTX (tmode);
14216 else if (icode == CODE_FOR_dfp_dscli_dd
14217 || icode == CODE_FOR_dfp_dscli_td
14218 || icode == CODE_FOR_dfp_dscri_dd
14219 || icode == CODE_FOR_dfp_dscri_td)
14221 /* Only allow 6-bit unsigned literals. */
14223 if (TREE_CODE (arg1) != INTEGER_CST
14224 || TREE_INT_CST_LOW (arg1) & ~0x3f)
14226 error ("argument 2 must be a 6-bit unsigned literal");
14227 return CONST0_RTX (tmode);
14230 else if (icode == CODE_FOR_crypto_vshasigmaw
14231 || icode == CODE_FOR_crypto_vshasigmad)
14233 /* Check whether the 2nd and 3rd arguments are integer constants and in
14234 range and prepare arguments. */
14236 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (wi::to_wide (arg1), 2))
14238 error ("argument 2 must be 0 or 1");
14239 return CONST0_RTX (tmode);
14243 if (TREE_CODE (arg2) != INTEGER_CST
14244 || wi::geu_p (wi::to_wide (arg2), 16))
14246 error ("argument 3 must be in the range 0..15");
14247 return CONST0_RTX (tmode);
14252 || GET_MODE (target) != tmode
14253 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14254 target = gen_reg_rtx (tmode);
14256 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14257 op0 = copy_to_mode_reg (mode0, op0);
14258 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14259 op1 = copy_to_mode_reg (mode1, op1);
14260 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14261 op2 = copy_to_mode_reg (mode2, op2);
14263 pat = GEN_FCN (icode) (target, op0, op1, op2);
14272 /* Expand the dst builtins. */
14274 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
14277 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14278 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14279 tree arg0, arg1, arg2;
14280 machine_mode mode0, mode1;
14281 rtx pat, op0, op1, op2;
14282 const struct builtin_description *d;
14285 *expandedp = false;
14287 /* Handle DST variants. */
14289 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14290 if (d->code == fcode)
14292 arg0 = CALL_EXPR_ARG (exp, 0);
14293 arg1 = CALL_EXPR_ARG (exp, 1);
14294 arg2 = CALL_EXPR_ARG (exp, 2);
14295 op0 = expand_normal (arg0);
14296 op1 = expand_normal (arg1);
14297 op2 = expand_normal (arg2);
14298 mode0 = insn_data[d->icode].operand[0].mode;
14299 mode1 = insn_data[d->icode].operand[1].mode;
14301 /* Invalid arguments, bail out before generating bad rtl. */
14302 if (arg0 == error_mark_node
14303 || arg1 == error_mark_node
14304 || arg2 == error_mark_node)
14309 if (TREE_CODE (arg2) != INTEGER_CST
14310 || TREE_INT_CST_LOW (arg2) & ~0x3)
14312 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
14316 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14317 op0 = copy_to_mode_reg (Pmode, op0);
14318 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14319 op1 = copy_to_mode_reg (mode1, op1);
14321 pat = GEN_FCN (d->icode) (op0, op1, op2);
14331 /* Expand vec_init builtin. */
14333 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
14335 machine_mode tmode = TYPE_MODE (type);
14336 machine_mode inner_mode = GET_MODE_INNER (tmode);
14337 int i, n_elt = GET_MODE_NUNITS (tmode);
14339 gcc_assert (VECTOR_MODE_P (tmode));
14340 gcc_assert (n_elt == call_expr_nargs (exp));
14342 if (!target || !register_operand (target, tmode))
14343 target = gen_reg_rtx (tmode);
14345 /* If we have a vector compromised of a single element, such as V1TImode, do
14346 the initialization directly. */
14347 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
14349 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
14350 emit_move_insn (target, gen_lowpart (tmode, x));
14354 rtvec v = rtvec_alloc (n_elt);
14356 for (i = 0; i < n_elt; ++i)
14358 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
14359 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14362 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
14368 /* Return the integer constant in ARG. Constrain it to be in the range
14369 of the subparts of VEC_TYPE; issue an error if not. */
14372 get_element_number (tree vec_type, tree arg)
14374 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14376 if (!tree_fits_uhwi_p (arg)
14377 || (elt = tree_to_uhwi (arg), elt > max))
14379 error ("selector must be an integer constant in the range 0..%wi", max);
14386 /* Expand vec_set builtin. */
14388 altivec_expand_vec_set_builtin (tree exp)
14390 machine_mode tmode, mode1;
14391 tree arg0, arg1, arg2;
14395 arg0 = CALL_EXPR_ARG (exp, 0);
14396 arg1 = CALL_EXPR_ARG (exp, 1);
14397 arg2 = CALL_EXPR_ARG (exp, 2);
14399 tmode = TYPE_MODE (TREE_TYPE (arg0));
14400 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14401 gcc_assert (VECTOR_MODE_P (tmode));
14403 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
14404 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
14405 elt = get_element_number (TREE_TYPE (arg0), arg2);
14407 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14408 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14410 op0 = force_reg (tmode, op0);
14411 op1 = force_reg (mode1, op1);
14413 rs6000_expand_vector_set (op0, op1, elt);
14418 /* Expand vec_ext builtin. */
14420 altivec_expand_vec_ext_builtin (tree exp, rtx target)
14422 machine_mode tmode, mode0;
14427 arg0 = CALL_EXPR_ARG (exp, 0);
14428 arg1 = CALL_EXPR_ARG (exp, 1);
14430 op0 = expand_normal (arg0);
14431 op1 = expand_normal (arg1);
14433 if (TREE_CODE (arg1) == INTEGER_CST)
14435 unsigned HOST_WIDE_INT elt;
14436 unsigned HOST_WIDE_INT size = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
14437 unsigned int truncated_selector;
14438 /* Even if !tree_fits_uhwi_p (arg1)), TREE_INT_CST_LOW (arg0)
14439 returns low-order bits of INTEGER_CST for modulo indexing. */
14440 elt = TREE_INT_CST_LOW (arg1);
14441 truncated_selector = elt % size;
14442 op1 = GEN_INT (truncated_selector);
14445 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14446 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14447 gcc_assert (VECTOR_MODE_P (mode0));
14449 op0 = force_reg (mode0, op0);
14451 if (optimize || !target || !register_operand (target, tmode))
14452 target = gen_reg_rtx (tmode);
14454 rs6000_expand_vector_extract (target, op0, op1);
14459 /* Expand the builtin in EXP and store the result in TARGET. Store
14460 true in *EXPANDEDP if we found a builtin to expand. */
14462 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
14464 const struct builtin_description *d;
14466 enum insn_code icode;
14467 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14468 tree arg0, arg1, arg2;
14470 machine_mode tmode, mode0;
14471 enum rs6000_builtins fcode
14472 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14474 if (rs6000_overloaded_builtin_p (fcode))
14477 error ("unresolved overload for Altivec builtin %qF", fndecl);
14479 /* Given it is invalid, just generate a normal call. */
14480 return expand_call (exp, target, false);
14483 target = altivec_expand_dst_builtin (exp, target, expandedp);
14491 case ALTIVEC_BUILTIN_STVX_V2DF:
14492 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
14493 case ALTIVEC_BUILTIN_STVX_V2DI:
14494 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
14495 case ALTIVEC_BUILTIN_STVX_V4SF:
14496 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
14497 case ALTIVEC_BUILTIN_STVX:
14498 case ALTIVEC_BUILTIN_STVX_V4SI:
14499 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
14500 case ALTIVEC_BUILTIN_STVX_V8HI:
14501 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
14502 case ALTIVEC_BUILTIN_STVX_V16QI:
14503 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
14504 case ALTIVEC_BUILTIN_STVEBX:
14505 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
14506 case ALTIVEC_BUILTIN_STVEHX:
14507 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
14508 case ALTIVEC_BUILTIN_STVEWX:
14509 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
14510 case ALTIVEC_BUILTIN_STVXL_V2DF:
14511 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
14512 case ALTIVEC_BUILTIN_STVXL_V2DI:
14513 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
14514 case ALTIVEC_BUILTIN_STVXL_V4SF:
14515 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
14516 case ALTIVEC_BUILTIN_STVXL:
14517 case ALTIVEC_BUILTIN_STVXL_V4SI:
14518 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
14519 case ALTIVEC_BUILTIN_STVXL_V8HI:
14520 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
14521 case ALTIVEC_BUILTIN_STVXL_V16QI:
14522 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
14524 case ALTIVEC_BUILTIN_STVLX:
14525 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
14526 case ALTIVEC_BUILTIN_STVLXL:
14527 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
14528 case ALTIVEC_BUILTIN_STVRX:
14529 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
14530 case ALTIVEC_BUILTIN_STVRXL:
14531 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
14533 case P9V_BUILTIN_STXVL:
14534 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
14536 case P9V_BUILTIN_XST_LEN_R:
14537 return altivec_expand_stxvl_builtin (CODE_FOR_xst_len_r, exp);
14539 case VSX_BUILTIN_STXVD2X_V1TI:
14540 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
14541 case VSX_BUILTIN_STXVD2X_V2DF:
14542 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
14543 case VSX_BUILTIN_STXVD2X_V2DI:
14544 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
14545 case VSX_BUILTIN_STXVW4X_V4SF:
14546 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
14547 case VSX_BUILTIN_STXVW4X_V4SI:
14548 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
14549 case VSX_BUILTIN_STXVW4X_V8HI:
14550 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
14551 case VSX_BUILTIN_STXVW4X_V16QI:
14552 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
14554 /* For the following on big endian, it's ok to use any appropriate
14555 unaligned-supporting store, so use a generic expander. For
14556 little-endian, the exact element-reversing instruction must
14558 case VSX_BUILTIN_ST_ELEMREV_V1TI:
14560 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti
14561 : CODE_FOR_vsx_st_elemrev_v1ti);
14562 return altivec_expand_stv_builtin (code, exp);
14564 case VSX_BUILTIN_ST_ELEMREV_V2DF:
14566 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
14567 : CODE_FOR_vsx_st_elemrev_v2df);
14568 return altivec_expand_stv_builtin (code, exp);
14570 case VSX_BUILTIN_ST_ELEMREV_V2DI:
14572 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
14573 : CODE_FOR_vsx_st_elemrev_v2di);
14574 return altivec_expand_stv_builtin (code, exp);
14576 case VSX_BUILTIN_ST_ELEMREV_V4SF:
14578 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
14579 : CODE_FOR_vsx_st_elemrev_v4sf);
14580 return altivec_expand_stv_builtin (code, exp);
14582 case VSX_BUILTIN_ST_ELEMREV_V4SI:
14584 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
14585 : CODE_FOR_vsx_st_elemrev_v4si);
14586 return altivec_expand_stv_builtin (code, exp);
14588 case VSX_BUILTIN_ST_ELEMREV_V8HI:
14590 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
14591 : CODE_FOR_vsx_st_elemrev_v8hi);
14592 return altivec_expand_stv_builtin (code, exp);
14594 case VSX_BUILTIN_ST_ELEMREV_V16QI:
14596 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
14597 : CODE_FOR_vsx_st_elemrev_v16qi);
14598 return altivec_expand_stv_builtin (code, exp);
14601 case ALTIVEC_BUILTIN_MFVSCR:
14602 icode = CODE_FOR_altivec_mfvscr;
14603 tmode = insn_data[icode].operand[0].mode;
14606 || GET_MODE (target) != tmode
14607 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14608 target = gen_reg_rtx (tmode);
14610 pat = GEN_FCN (icode) (target);
14616 case ALTIVEC_BUILTIN_MTVSCR:
14617 icode = CODE_FOR_altivec_mtvscr;
14618 arg0 = CALL_EXPR_ARG (exp, 0);
14619 op0 = expand_normal (arg0);
14620 mode0 = insn_data[icode].operand[0].mode;
14622 /* If we got invalid arguments bail out before generating bad rtl. */
14623 if (arg0 == error_mark_node)
14626 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14627 op0 = copy_to_mode_reg (mode0, op0);
14629 pat = GEN_FCN (icode) (op0);
14634 case ALTIVEC_BUILTIN_DSSALL:
14635 emit_insn (gen_altivec_dssall ());
14638 case ALTIVEC_BUILTIN_DSS:
14639 icode = CODE_FOR_altivec_dss;
14640 arg0 = CALL_EXPR_ARG (exp, 0);
14642 op0 = expand_normal (arg0);
14643 mode0 = insn_data[icode].operand[0].mode;
14645 /* If we got invalid arguments bail out before generating bad rtl. */
14646 if (arg0 == error_mark_node)
14649 if (TREE_CODE (arg0) != INTEGER_CST
14650 || TREE_INT_CST_LOW (arg0) & ~0x3)
14652 error ("argument to %qs must be a 2-bit unsigned literal", "dss");
14656 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14657 op0 = copy_to_mode_reg (mode0, op0);
14659 emit_insn (gen_altivec_dss (op0));
14662 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
14663 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
14664 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
14665 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
14666 case VSX_BUILTIN_VEC_INIT_V2DF:
14667 case VSX_BUILTIN_VEC_INIT_V2DI:
14668 case VSX_BUILTIN_VEC_INIT_V1TI:
14669 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
14671 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
14672 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
14673 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
14674 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
14675 case VSX_BUILTIN_VEC_SET_V2DF:
14676 case VSX_BUILTIN_VEC_SET_V2DI:
14677 case VSX_BUILTIN_VEC_SET_V1TI:
14678 return altivec_expand_vec_set_builtin (exp);
14680 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
14681 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
14682 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
14683 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
14684 case VSX_BUILTIN_VEC_EXT_V2DF:
14685 case VSX_BUILTIN_VEC_EXT_V2DI:
14686 case VSX_BUILTIN_VEC_EXT_V1TI:
14687 return altivec_expand_vec_ext_builtin (exp, target);
14689 case P9V_BUILTIN_VEC_EXTRACT4B:
14690 arg1 = CALL_EXPR_ARG (exp, 1);
14693 /* Generate a normal call if it is invalid. */
14694 if (arg1 == error_mark_node)
14695 return expand_call (exp, target, false);
14697 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
14699 error ("second argument to %qs must be 0..12", "vec_vextract4b");
14700 return expand_call (exp, target, false);
14704 case P9V_BUILTIN_VEC_INSERT4B:
14705 arg2 = CALL_EXPR_ARG (exp, 2);
14708 /* Generate a normal call if it is invalid. */
14709 if (arg2 == error_mark_node)
14710 return expand_call (exp, target, false);
14712 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
14714 error ("third argument to %qs must be 0..12", "vec_vinsert4b");
14715 return expand_call (exp, target, false);
14721 /* Fall through. */
14724 /* Expand abs* operations. */
14726 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14727 if (d->code == fcode)
14728 return altivec_expand_abs_builtin (d->icode, exp, target);
14730 /* Expand the AltiVec predicates. */
14731 d = bdesc_altivec_preds;
14732 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14733 if (d->code == fcode)
14734 return altivec_expand_predicate_builtin (d->icode, exp, target);
14736 /* LV* are funky. We initialized them differently. */
14739 case ALTIVEC_BUILTIN_LVSL:
14740 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
14741 exp, target, false);
14742 case ALTIVEC_BUILTIN_LVSR:
14743 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
14744 exp, target, false);
14745 case ALTIVEC_BUILTIN_LVEBX:
14746 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
14747 exp, target, false);
14748 case ALTIVEC_BUILTIN_LVEHX:
14749 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
14750 exp, target, false);
14751 case ALTIVEC_BUILTIN_LVEWX:
14752 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
14753 exp, target, false);
14754 case ALTIVEC_BUILTIN_LVXL_V2DF:
14755 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
14756 exp, target, false);
14757 case ALTIVEC_BUILTIN_LVXL_V2DI:
14758 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
14759 exp, target, false);
14760 case ALTIVEC_BUILTIN_LVXL_V4SF:
14761 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
14762 exp, target, false);
14763 case ALTIVEC_BUILTIN_LVXL:
14764 case ALTIVEC_BUILTIN_LVXL_V4SI:
14765 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
14766 exp, target, false);
14767 case ALTIVEC_BUILTIN_LVXL_V8HI:
14768 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
14769 exp, target, false);
14770 case ALTIVEC_BUILTIN_LVXL_V16QI:
14771 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
14772 exp, target, false);
14773 case ALTIVEC_BUILTIN_LVX_V1TI:
14774 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v1ti,
14775 exp, target, false);
14776 case ALTIVEC_BUILTIN_LVX_V2DF:
14777 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
14778 exp, target, false);
14779 case ALTIVEC_BUILTIN_LVX_V2DI:
14780 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
14781 exp, target, false);
14782 case ALTIVEC_BUILTIN_LVX_V4SF:
14783 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
14784 exp, target, false);
14785 case ALTIVEC_BUILTIN_LVX:
14786 case ALTIVEC_BUILTIN_LVX_V4SI:
14787 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
14788 exp, target, false);
14789 case ALTIVEC_BUILTIN_LVX_V8HI:
14790 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
14791 exp, target, false);
14792 case ALTIVEC_BUILTIN_LVX_V16QI:
14793 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
14794 exp, target, false);
14795 case ALTIVEC_BUILTIN_LVLX:
14796 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
14797 exp, target, true);
14798 case ALTIVEC_BUILTIN_LVLXL:
14799 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
14800 exp, target, true);
14801 case ALTIVEC_BUILTIN_LVRX:
14802 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
14803 exp, target, true);
14804 case ALTIVEC_BUILTIN_LVRXL:
14805 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
14806 exp, target, true);
14807 case VSX_BUILTIN_LXVD2X_V1TI:
14808 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
14809 exp, target, false);
14810 case VSX_BUILTIN_LXVD2X_V2DF:
14811 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
14812 exp, target, false);
14813 case VSX_BUILTIN_LXVD2X_V2DI:
14814 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
14815 exp, target, false);
14816 case VSX_BUILTIN_LXVW4X_V4SF:
14817 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
14818 exp, target, false);
14819 case VSX_BUILTIN_LXVW4X_V4SI:
14820 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
14821 exp, target, false);
14822 case VSX_BUILTIN_LXVW4X_V8HI:
14823 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
14824 exp, target, false);
14825 case VSX_BUILTIN_LXVW4X_V16QI:
14826 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
14827 exp, target, false);
14828 /* For the following on big endian, it's ok to use any appropriate
14829 unaligned-supporting load, so use a generic expander. For
14830 little-endian, the exact element-reversing instruction must
14832 case VSX_BUILTIN_LD_ELEMREV_V2DF:
14834 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
14835 : CODE_FOR_vsx_ld_elemrev_v2df);
14836 return altivec_expand_lv_builtin (code, exp, target, false);
14838 case VSX_BUILTIN_LD_ELEMREV_V1TI:
14840 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti
14841 : CODE_FOR_vsx_ld_elemrev_v1ti);
14842 return altivec_expand_lv_builtin (code, exp, target, false);
14844 case VSX_BUILTIN_LD_ELEMREV_V2DI:
14846 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
14847 : CODE_FOR_vsx_ld_elemrev_v2di);
14848 return altivec_expand_lv_builtin (code, exp, target, false);
14850 case VSX_BUILTIN_LD_ELEMREV_V4SF:
14852 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
14853 : CODE_FOR_vsx_ld_elemrev_v4sf);
14854 return altivec_expand_lv_builtin (code, exp, target, false);
14856 case VSX_BUILTIN_LD_ELEMREV_V4SI:
14858 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
14859 : CODE_FOR_vsx_ld_elemrev_v4si);
14860 return altivec_expand_lv_builtin (code, exp, target, false);
14862 case VSX_BUILTIN_LD_ELEMREV_V8HI:
14864 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
14865 : CODE_FOR_vsx_ld_elemrev_v8hi);
14866 return altivec_expand_lv_builtin (code, exp, target, false);
14868 case VSX_BUILTIN_LD_ELEMREV_V16QI:
14870 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
14871 : CODE_FOR_vsx_ld_elemrev_v16qi);
14872 return altivec_expand_lv_builtin (code, exp, target, false);
14877 /* Fall through. */
14880 *expandedp = false;
14884 /* Check whether a builtin function is supported in this target
14887 rs6000_builtin_is_supported_p (enum rs6000_builtins fncode)
14889 HOST_WIDE_INT fnmask = rs6000_builtin_info[fncode].mask;
14890 if ((fnmask & rs6000_builtin_mask) != fnmask)
14896 /* Raise an error message for a builtin function that is called without the
14897 appropriate target options being set. */
14900 rs6000_invalid_builtin (enum rs6000_builtins fncode)
14902 size_t uns_fncode = (size_t) fncode;
14903 const char *name = rs6000_builtin_info[uns_fncode].name;
14904 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
14906 gcc_assert (name != NULL);
14907 if ((fnmask & RS6000_BTM_CELL) != 0)
14908 error ("builtin function %qs is only valid for the cell processor", name);
14909 else if ((fnmask & RS6000_BTM_VSX) != 0)
14910 error ("builtin function %qs requires the %qs option", name, "-mvsx");
14911 else if ((fnmask & RS6000_BTM_HTM) != 0)
14912 error ("builtin function %qs requires the %qs option", name, "-mhtm");
14913 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
14914 error ("builtin function %qs requires the %qs option", name, "-maltivec");
14915 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14916 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14917 error ("builtin function %qs requires the %qs and %qs options",
14918 name, "-mhard-dfp", "-mpower8-vector");
14919 else if ((fnmask & RS6000_BTM_DFP) != 0)
14920 error ("builtin function %qs requires the %qs option", name, "-mhard-dfp");
14921 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
14922 error ("builtin function %qs requires the %qs option", name,
14923 "-mpower8-vector");
14924 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
14925 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
14926 error ("builtin function %qs requires the %qs and %qs options",
14927 name, "-mcpu=power9", "-m64");
14928 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
14929 error ("builtin function %qs requires the %qs option", name,
14931 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
14932 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
14933 error ("builtin function %qs requires the %qs and %qs options",
14934 name, "-mcpu=power9", "-m64");
14935 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
14936 error ("builtin function %qs requires the %qs option", name,
14938 else if ((fnmask & RS6000_BTM_LDBL128) == RS6000_BTM_LDBL128)
14940 if (!TARGET_HARD_FLOAT)
14941 error ("builtin function %qs requires the %qs option", name,
14944 error ("builtin function %qs requires the %qs option", name,
14945 TARGET_IEEEQUAD ? "-mabi=ibmlongdouble" : "-mlong-double-128");
14947 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
14948 error ("builtin function %qs requires the %qs option", name,
14950 else if ((fnmask & RS6000_BTM_FLOAT128_HW) != 0)
14951 error ("builtin function %qs requires ISA 3.0 IEEE 128-bit floating point",
14953 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
14954 error ("builtin function %qs requires the %qs option", name,
14956 else if ((fnmask & (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
14957 == (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
14958 error ("builtin function %qs requires the %qs (or newer), and "
14959 "%qs or %qs options",
14960 name, "-mcpu=power7", "-m64", "-mpowerpc64");
14962 error ("builtin function %qs is not supported with the current options",
14966 /* Target hook for early folding of built-ins, shamelessly stolen
14970 rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED,
14971 int n_args ATTRIBUTE_UNUSED,
14972 tree *args ATTRIBUTE_UNUSED,
14973 bool ignore ATTRIBUTE_UNUSED)
14975 #ifdef SUBTARGET_FOLD_BUILTIN
14976 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
14982 /* Helper function to sort out which built-ins may be valid without having
14985 rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
14989 case ALTIVEC_BUILTIN_STVX_V16QI:
14990 case ALTIVEC_BUILTIN_STVX_V8HI:
14991 case ALTIVEC_BUILTIN_STVX_V4SI:
14992 case ALTIVEC_BUILTIN_STVX_V4SF:
14993 case ALTIVEC_BUILTIN_STVX_V2DI:
14994 case ALTIVEC_BUILTIN_STVX_V2DF:
14995 case VSX_BUILTIN_STXVW4X_V16QI:
14996 case VSX_BUILTIN_STXVW4X_V8HI:
14997 case VSX_BUILTIN_STXVW4X_V4SF:
14998 case VSX_BUILTIN_STXVW4X_V4SI:
14999 case VSX_BUILTIN_STXVD2X_V2DF:
15000 case VSX_BUILTIN_STXVD2X_V2DI:
15007 /* Helper function to handle the gimple folding of a vector compare
15008 operation. This sets up true/false vectors, and uses the
15009 VEC_COND_EXPR operation.
15010 CODE indicates which comparison is to be made. (EQ, GT, ...).
15011 TYPE indicates the type of the result. */
15013 fold_build_vec_cmp (tree_code code, tree type,
15014 tree arg0, tree arg1)
15016 tree cmp_type = build_same_sized_truth_vector_type (type);
15017 tree zero_vec = build_zero_cst (type);
15018 tree minus_one_vec = build_minus_one_cst (type);
15019 tree cmp = fold_build2 (code, cmp_type, arg0, arg1);
15020 return fold_build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
15023 /* Helper function to handle the in-between steps for the
15024 vector compare built-ins. */
15026 fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt)
15028 tree arg0 = gimple_call_arg (stmt, 0);
15029 tree arg1 = gimple_call_arg (stmt, 1);
15030 tree lhs = gimple_call_lhs (stmt);
15031 tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1);
15032 gimple *g = gimple_build_assign (lhs, cmp);
15033 gimple_set_location (g, gimple_location (stmt));
15034 gsi_replace (gsi, g, true);
15037 /* Helper function to map V2DF and V4SF types to their
15038 integral equivalents (V2DI and V4SI). */
15039 tree map_to_integral_tree_type (tree input_tree_type)
15041 if (INTEGRAL_TYPE_P (TREE_TYPE (input_tree_type)))
15042 return input_tree_type;
15045 if (types_compatible_p (TREE_TYPE (input_tree_type),
15046 TREE_TYPE (V2DF_type_node)))
15047 return V2DI_type_node;
15048 else if (types_compatible_p (TREE_TYPE (input_tree_type),
15049 TREE_TYPE (V4SF_type_node)))
15050 return V4SI_type_node;
15052 gcc_unreachable ();
15056 /* Helper function to handle the vector merge[hl] built-ins. The
15057 implementation difference between h and l versions for this code are in
15058 the values used when building of the permute vector for high word versus
15059 low word merge. The variance is keyed off the use_high parameter. */
15061 fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high)
15063 tree arg0 = gimple_call_arg (stmt, 0);
15064 tree arg1 = gimple_call_arg (stmt, 1);
15065 tree lhs = gimple_call_lhs (stmt);
15066 tree lhs_type = TREE_TYPE (lhs);
15067 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
15068 int midpoint = n_elts / 2;
15074 /* The permute_type will match the lhs for integral types. For double and
15075 float types, the permute type needs to map to the V2 or V4 type that
15078 permute_type = map_to_integral_tree_type (lhs_type);
15079 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
15081 for (int i = 0; i < midpoint; i++)
15083 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
15085 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
15086 offset + n_elts + i));
15089 tree permute = elts.build ();
15091 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
15092 gimple_set_location (g, gimple_location (stmt));
15093 gsi_replace (gsi, g, true);
15096 /* Helper function to handle the vector merge[eo] built-ins. */
15098 fold_mergeeo_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_odd)
15100 tree arg0 = gimple_call_arg (stmt, 0);
15101 tree arg1 = gimple_call_arg (stmt, 1);
15102 tree lhs = gimple_call_lhs (stmt);
15103 tree lhs_type = TREE_TYPE (lhs);
15104 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
15106 /* The permute_type will match the lhs for integral types. For double and
15107 float types, the permute type needs to map to the V2 or V4 type that
15110 permute_type = map_to_integral_tree_type (lhs_type);
15112 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
15114 /* Build the permute vector. */
15115 for (int i = 0; i < n_elts / 2; i++)
15117 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
15119 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
15120 2*i + use_odd + n_elts));
15123 tree permute = elts.build ();
15125 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
15126 gimple_set_location (g, gimple_location (stmt));
15127 gsi_replace (gsi, g, true);
15130 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
15131 a constant, use rs6000_fold_builtin.) */
15134 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
15136 gimple *stmt = gsi_stmt (*gsi);
15137 tree fndecl = gimple_call_fndecl (stmt);
15138 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
15139 enum rs6000_builtins fn_code
15140 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15141 tree arg0, arg1, lhs, temp;
15142 enum tree_code bcode;
15145 size_t uns_fncode = (size_t) fn_code;
15146 enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
15147 const char *fn_name1 = rs6000_builtin_info[uns_fncode].name;
15148 const char *fn_name2 = (icode != CODE_FOR_nothing)
15149 ? get_insn_name ((int) icode)
15152 if (TARGET_DEBUG_BUILTIN)
15153 fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n",
15154 fn_code, fn_name1, fn_name2);
15156 if (!rs6000_fold_gimple)
15159 /* Prevent gimple folding for code that does not have a LHS, unless it is
15160 allowed per the rs6000_builtin_valid_without_lhs helper function. */
15161 if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code))
15164 /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */
15165 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fncode].mask;
15166 bool func_valid_p = (rs6000_builtin_mask & mask) == mask;
15172 /* Flavors of vec_add. We deliberately don't expand
15173 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
15174 TImode, resulting in much poorer code generation. */
15175 case ALTIVEC_BUILTIN_VADDUBM:
15176 case ALTIVEC_BUILTIN_VADDUHM:
15177 case ALTIVEC_BUILTIN_VADDUWM:
15178 case P8V_BUILTIN_VADDUDM:
15179 case ALTIVEC_BUILTIN_VADDFP:
15180 case VSX_BUILTIN_XVADDDP:
15183 arg0 = gimple_call_arg (stmt, 0);
15184 arg1 = gimple_call_arg (stmt, 1);
15185 lhs = gimple_call_lhs (stmt);
15186 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (lhs)))
15187 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (lhs))))
15189 /* Ensure the binary operation is performed in a type
15190 that wraps if it is integral type. */
15191 gimple_seq stmts = NULL;
15192 tree type = unsigned_type_for (TREE_TYPE (lhs));
15193 tree uarg0 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15195 tree uarg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15197 tree res = gimple_build (&stmts, gimple_location (stmt), bcode,
15198 type, uarg0, uarg1);
15199 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15200 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR,
15201 build1 (VIEW_CONVERT_EXPR,
15202 TREE_TYPE (lhs), res));
15203 gsi_replace (gsi, g, true);
15206 g = gimple_build_assign (lhs, bcode, arg0, arg1);
15207 gimple_set_location (g, gimple_location (stmt));
15208 gsi_replace (gsi, g, true);
15210 /* Flavors of vec_sub. We deliberately don't expand
15211 P8V_BUILTIN_VSUBUQM. */
15212 case ALTIVEC_BUILTIN_VSUBUBM:
15213 case ALTIVEC_BUILTIN_VSUBUHM:
15214 case ALTIVEC_BUILTIN_VSUBUWM:
15215 case P8V_BUILTIN_VSUBUDM:
15216 case ALTIVEC_BUILTIN_VSUBFP:
15217 case VSX_BUILTIN_XVSUBDP:
15218 bcode = MINUS_EXPR;
15220 case VSX_BUILTIN_XVMULSP:
15221 case VSX_BUILTIN_XVMULDP:
15222 arg0 = gimple_call_arg (stmt, 0);
15223 arg1 = gimple_call_arg (stmt, 1);
15224 lhs = gimple_call_lhs (stmt);
15225 g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
15226 gimple_set_location (g, gimple_location (stmt));
15227 gsi_replace (gsi, g, true);
15229 /* Even element flavors of vec_mul (signed). */
15230 case ALTIVEC_BUILTIN_VMULESB:
15231 case ALTIVEC_BUILTIN_VMULESH:
15232 case P8V_BUILTIN_VMULESW:
15233 /* Even element flavors of vec_mul (unsigned). */
15234 case ALTIVEC_BUILTIN_VMULEUB:
15235 case ALTIVEC_BUILTIN_VMULEUH:
15236 case P8V_BUILTIN_VMULEUW:
15237 arg0 = gimple_call_arg (stmt, 0);
15238 arg1 = gimple_call_arg (stmt, 1);
15239 lhs = gimple_call_lhs (stmt);
15240 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
15241 gimple_set_location (g, gimple_location (stmt));
15242 gsi_replace (gsi, g, true);
15244 /* Odd element flavors of vec_mul (signed). */
15245 case ALTIVEC_BUILTIN_VMULOSB:
15246 case ALTIVEC_BUILTIN_VMULOSH:
15247 case P8V_BUILTIN_VMULOSW:
15248 /* Odd element flavors of vec_mul (unsigned). */
15249 case ALTIVEC_BUILTIN_VMULOUB:
15250 case ALTIVEC_BUILTIN_VMULOUH:
15251 case P8V_BUILTIN_VMULOUW:
15252 arg0 = gimple_call_arg (stmt, 0);
15253 arg1 = gimple_call_arg (stmt, 1);
15254 lhs = gimple_call_lhs (stmt);
15255 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
15256 gimple_set_location (g, gimple_location (stmt));
15257 gsi_replace (gsi, g, true);
15259 /* Flavors of vec_div (Integer). */
15260 case VSX_BUILTIN_DIV_V2DI:
15261 case VSX_BUILTIN_UDIV_V2DI:
15262 arg0 = gimple_call_arg (stmt, 0);
15263 arg1 = gimple_call_arg (stmt, 1);
15264 lhs = gimple_call_lhs (stmt);
15265 g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
15266 gimple_set_location (g, gimple_location (stmt));
15267 gsi_replace (gsi, g, true);
15269 /* Flavors of vec_div (Float). */
15270 case VSX_BUILTIN_XVDIVSP:
15271 case VSX_BUILTIN_XVDIVDP:
15272 arg0 = gimple_call_arg (stmt, 0);
15273 arg1 = gimple_call_arg (stmt, 1);
15274 lhs = gimple_call_lhs (stmt);
15275 g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
15276 gimple_set_location (g, gimple_location (stmt));
15277 gsi_replace (gsi, g, true);
15279 /* Flavors of vec_and. */
15280 case ALTIVEC_BUILTIN_VAND:
15281 arg0 = gimple_call_arg (stmt, 0);
15282 arg1 = gimple_call_arg (stmt, 1);
15283 lhs = gimple_call_lhs (stmt);
15284 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
15285 gimple_set_location (g, gimple_location (stmt));
15286 gsi_replace (gsi, g, true);
15288 /* Flavors of vec_andc. */
15289 case ALTIVEC_BUILTIN_VANDC:
15290 arg0 = gimple_call_arg (stmt, 0);
15291 arg1 = gimple_call_arg (stmt, 1);
15292 lhs = gimple_call_lhs (stmt);
15293 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15294 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15295 gimple_set_location (g, gimple_location (stmt));
15296 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15297 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
15298 gimple_set_location (g, gimple_location (stmt));
15299 gsi_replace (gsi, g, true);
15301 /* Flavors of vec_nand. */
15302 case P8V_BUILTIN_VEC_NAND:
15303 case P8V_BUILTIN_NAND_V16QI:
15304 case P8V_BUILTIN_NAND_V8HI:
15305 case P8V_BUILTIN_NAND_V4SI:
15306 case P8V_BUILTIN_NAND_V4SF:
15307 case P8V_BUILTIN_NAND_V2DF:
15308 case P8V_BUILTIN_NAND_V2DI:
15309 arg0 = gimple_call_arg (stmt, 0);
15310 arg1 = gimple_call_arg (stmt, 1);
15311 lhs = gimple_call_lhs (stmt);
15312 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15313 g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
15314 gimple_set_location (g, gimple_location (stmt));
15315 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15316 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15317 gimple_set_location (g, gimple_location (stmt));
15318 gsi_replace (gsi, g, true);
15320 /* Flavors of vec_or. */
15321 case ALTIVEC_BUILTIN_VOR:
15322 arg0 = gimple_call_arg (stmt, 0);
15323 arg1 = gimple_call_arg (stmt, 1);
15324 lhs = gimple_call_lhs (stmt);
15325 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
15326 gimple_set_location (g, gimple_location (stmt));
15327 gsi_replace (gsi, g, true);
15329 /* flavors of vec_orc. */
15330 case P8V_BUILTIN_ORC_V16QI:
15331 case P8V_BUILTIN_ORC_V8HI:
15332 case P8V_BUILTIN_ORC_V4SI:
15333 case P8V_BUILTIN_ORC_V4SF:
15334 case P8V_BUILTIN_ORC_V2DF:
15335 case P8V_BUILTIN_ORC_V2DI:
15336 arg0 = gimple_call_arg (stmt, 0);
15337 arg1 = gimple_call_arg (stmt, 1);
15338 lhs = gimple_call_lhs (stmt);
15339 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15340 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15341 gimple_set_location (g, gimple_location (stmt));
15342 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15343 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
15344 gimple_set_location (g, gimple_location (stmt));
15345 gsi_replace (gsi, g, true);
15347 /* Flavors of vec_xor. */
15348 case ALTIVEC_BUILTIN_VXOR:
15349 arg0 = gimple_call_arg (stmt, 0);
15350 arg1 = gimple_call_arg (stmt, 1);
15351 lhs = gimple_call_lhs (stmt);
15352 g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
15353 gimple_set_location (g, gimple_location (stmt));
15354 gsi_replace (gsi, g, true);
15356 /* Flavors of vec_nor. */
15357 case ALTIVEC_BUILTIN_VNOR:
15358 arg0 = gimple_call_arg (stmt, 0);
15359 arg1 = gimple_call_arg (stmt, 1);
15360 lhs = gimple_call_lhs (stmt);
15361 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15362 g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
15363 gimple_set_location (g, gimple_location (stmt));
15364 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15365 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15366 gimple_set_location (g, gimple_location (stmt));
15367 gsi_replace (gsi, g, true);
15369 /* flavors of vec_abs. */
15370 case ALTIVEC_BUILTIN_ABS_V16QI:
15371 case ALTIVEC_BUILTIN_ABS_V8HI:
15372 case ALTIVEC_BUILTIN_ABS_V4SI:
15373 case ALTIVEC_BUILTIN_ABS_V4SF:
15374 case P8V_BUILTIN_ABS_V2DI:
15375 case VSX_BUILTIN_XVABSDP:
15376 arg0 = gimple_call_arg (stmt, 0);
15377 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
15378 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
15380 lhs = gimple_call_lhs (stmt);
15381 g = gimple_build_assign (lhs, ABS_EXPR, arg0);
15382 gimple_set_location (g, gimple_location (stmt));
15383 gsi_replace (gsi, g, true);
15385 /* flavors of vec_min. */
15386 case VSX_BUILTIN_XVMINDP:
15387 case P8V_BUILTIN_VMINSD:
15388 case P8V_BUILTIN_VMINUD:
15389 case ALTIVEC_BUILTIN_VMINSB:
15390 case ALTIVEC_BUILTIN_VMINSH:
15391 case ALTIVEC_BUILTIN_VMINSW:
15392 case ALTIVEC_BUILTIN_VMINUB:
15393 case ALTIVEC_BUILTIN_VMINUH:
15394 case ALTIVEC_BUILTIN_VMINUW:
15395 case ALTIVEC_BUILTIN_VMINFP:
15396 arg0 = gimple_call_arg (stmt, 0);
15397 arg1 = gimple_call_arg (stmt, 1);
15398 lhs = gimple_call_lhs (stmt);
15399 g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1);
15400 gimple_set_location (g, gimple_location (stmt));
15401 gsi_replace (gsi, g, true);
15403 /* flavors of vec_max. */
15404 case VSX_BUILTIN_XVMAXDP:
15405 case P8V_BUILTIN_VMAXSD:
15406 case P8V_BUILTIN_VMAXUD:
15407 case ALTIVEC_BUILTIN_VMAXSB:
15408 case ALTIVEC_BUILTIN_VMAXSH:
15409 case ALTIVEC_BUILTIN_VMAXSW:
15410 case ALTIVEC_BUILTIN_VMAXUB:
15411 case ALTIVEC_BUILTIN_VMAXUH:
15412 case ALTIVEC_BUILTIN_VMAXUW:
15413 case ALTIVEC_BUILTIN_VMAXFP:
15414 arg0 = gimple_call_arg (stmt, 0);
15415 arg1 = gimple_call_arg (stmt, 1);
15416 lhs = gimple_call_lhs (stmt);
15417 g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1);
15418 gimple_set_location (g, gimple_location (stmt));
15419 gsi_replace (gsi, g, true);
15421 /* Flavors of vec_eqv. */
15422 case P8V_BUILTIN_EQV_V16QI:
15423 case P8V_BUILTIN_EQV_V8HI:
15424 case P8V_BUILTIN_EQV_V4SI:
15425 case P8V_BUILTIN_EQV_V4SF:
15426 case P8V_BUILTIN_EQV_V2DF:
15427 case P8V_BUILTIN_EQV_V2DI:
15428 arg0 = gimple_call_arg (stmt, 0);
15429 arg1 = gimple_call_arg (stmt, 1);
15430 lhs = gimple_call_lhs (stmt);
15431 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15432 g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
15433 gimple_set_location (g, gimple_location (stmt));
15434 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15435 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15436 gimple_set_location (g, gimple_location (stmt));
15437 gsi_replace (gsi, g, true);
15439 /* Flavors of vec_rotate_left. */
15440 case ALTIVEC_BUILTIN_VRLB:
15441 case ALTIVEC_BUILTIN_VRLH:
15442 case ALTIVEC_BUILTIN_VRLW:
15443 case P8V_BUILTIN_VRLD:
15444 arg0 = gimple_call_arg (stmt, 0);
15445 arg1 = gimple_call_arg (stmt, 1);
15446 lhs = gimple_call_lhs (stmt);
15447 g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
15448 gimple_set_location (g, gimple_location (stmt));
15449 gsi_replace (gsi, g, true);
15451 /* Flavors of vector shift right algebraic.
15452 vec_sra{b,h,w} -> vsra{b,h,w}. */
15453 case ALTIVEC_BUILTIN_VSRAB:
15454 case ALTIVEC_BUILTIN_VSRAH:
15455 case ALTIVEC_BUILTIN_VSRAW:
15456 case P8V_BUILTIN_VSRAD:
15458 arg0 = gimple_call_arg (stmt, 0);
15459 arg1 = gimple_call_arg (stmt, 1);
15460 lhs = gimple_call_lhs (stmt);
15461 tree arg1_type = TREE_TYPE (arg1);
15462 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15463 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15464 location_t loc = gimple_location (stmt);
15465 /* Force arg1 into the range valid matching the arg0 type. */
15466 /* Build a vector consisting of the max valid bit-size values. */
15467 int n_elts = VECTOR_CST_NELTS (arg1);
15468 tree element_size = build_int_cst (unsigned_element_type,
15470 tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
15471 for (int i = 0; i < n_elts; i++)
15472 elts.safe_push (element_size);
15473 tree modulo_tree = elts.build ();
15474 /* Modulo the provided shift value against that vector. */
15475 gimple_seq stmts = NULL;
15476 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15477 unsigned_arg1_type, arg1);
15478 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15479 unsigned_arg1_type, unsigned_arg1,
15481 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15482 /* And finally, do the shift. */
15483 g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, new_arg1);
15484 gimple_set_location (g, loc);
15485 gsi_replace (gsi, g, true);
15488 /* Flavors of vector shift left.
15489 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
15490 case ALTIVEC_BUILTIN_VSLB:
15491 case ALTIVEC_BUILTIN_VSLH:
15492 case ALTIVEC_BUILTIN_VSLW:
15493 case P8V_BUILTIN_VSLD:
15496 gimple_seq stmts = NULL;
15497 arg0 = gimple_call_arg (stmt, 0);
15498 tree arg0_type = TREE_TYPE (arg0);
15499 if (INTEGRAL_TYPE_P (TREE_TYPE (arg0_type))
15500 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0_type)))
15502 arg1 = gimple_call_arg (stmt, 1);
15503 tree arg1_type = TREE_TYPE (arg1);
15504 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15505 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15506 loc = gimple_location (stmt);
15507 lhs = gimple_call_lhs (stmt);
15508 /* Force arg1 into the range valid matching the arg0 type. */
15509 /* Build a vector consisting of the max valid bit-size values. */
15510 int n_elts = VECTOR_CST_NELTS (arg1);
15511 int tree_size_in_bits = TREE_INT_CST_LOW (size_in_bytes (arg1_type))
15513 tree element_size = build_int_cst (unsigned_element_type,
15514 tree_size_in_bits / n_elts);
15515 tree_vector_builder elts (unsigned_type_for (arg1_type), n_elts, 1);
15516 for (int i = 0; i < n_elts; i++)
15517 elts.safe_push (element_size);
15518 tree modulo_tree = elts.build ();
15519 /* Modulo the provided shift value against that vector. */
15520 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15521 unsigned_arg1_type, arg1);
15522 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15523 unsigned_arg1_type, unsigned_arg1,
15525 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15526 /* And finally, do the shift. */
15527 g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, new_arg1);
15528 gimple_set_location (g, gimple_location (stmt));
15529 gsi_replace (gsi, g, true);
15532 /* Flavors of vector shift right. */
15533 case ALTIVEC_BUILTIN_VSRB:
15534 case ALTIVEC_BUILTIN_VSRH:
15535 case ALTIVEC_BUILTIN_VSRW:
15536 case P8V_BUILTIN_VSRD:
15538 arg0 = gimple_call_arg (stmt, 0);
15539 arg1 = gimple_call_arg (stmt, 1);
15540 lhs = gimple_call_lhs (stmt);
15541 tree arg1_type = TREE_TYPE (arg1);
15542 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15543 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15544 location_t loc = gimple_location (stmt);
15545 gimple_seq stmts = NULL;
15546 /* Convert arg0 to unsigned. */
15548 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15549 unsigned_type_for (TREE_TYPE (arg0)), arg0);
15550 /* Force arg1 into the range valid matching the arg0 type. */
15551 /* Build a vector consisting of the max valid bit-size values. */
15552 int n_elts = VECTOR_CST_NELTS (arg1);
15553 tree element_size = build_int_cst (unsigned_element_type,
15555 tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
15556 for (int i = 0; i < n_elts; i++)
15557 elts.safe_push (element_size);
15558 tree modulo_tree = elts.build ();
15559 /* Modulo the provided shift value against that vector. */
15560 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15561 unsigned_arg1_type, arg1);
15562 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15563 unsigned_arg1_type, unsigned_arg1,
15565 /* Do the shift. */
15567 = gimple_build (&stmts, RSHIFT_EXPR,
15568 TREE_TYPE (arg0_unsigned), arg0_unsigned, new_arg1);
15569 /* Convert result back to the lhs type. */
15570 res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
15571 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15572 update_call_from_tree (gsi, res);
15575 /* Vector loads. */
15576 case ALTIVEC_BUILTIN_LVX_V16QI:
15577 case ALTIVEC_BUILTIN_LVX_V8HI:
15578 case ALTIVEC_BUILTIN_LVX_V4SI:
15579 case ALTIVEC_BUILTIN_LVX_V4SF:
15580 case ALTIVEC_BUILTIN_LVX_V2DI:
15581 case ALTIVEC_BUILTIN_LVX_V2DF:
15582 case ALTIVEC_BUILTIN_LVX_V1TI:
15584 arg0 = gimple_call_arg (stmt, 0); // offset
15585 arg1 = gimple_call_arg (stmt, 1); // address
15586 lhs = gimple_call_lhs (stmt);
15587 location_t loc = gimple_location (stmt);
15588 /* Since arg1 may be cast to a different type, just use ptr_type_node
15589 here instead of trying to enforce TBAA on pointer types. */
15590 tree arg1_type = ptr_type_node;
15591 tree lhs_type = TREE_TYPE (lhs);
15592 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15593 the tree using the value from arg0. The resulting type will match
15594 the type of arg1. */
15595 gimple_seq stmts = NULL;
15596 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
15597 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15598 arg1_type, arg1, temp_offset);
15599 /* Mask off any lower bits from the address. */
15600 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15601 arg1_type, temp_addr,
15602 build_int_cst (arg1_type, -16));
15603 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15604 if (!is_gimple_mem_ref_addr (aligned_addr))
15606 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
15607 gimple *g = gimple_build_assign (t, aligned_addr);
15608 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15611 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
15612 take an offset, but since we've already incorporated the offset
15613 above, here we just pass in a zero. */
15615 = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
15616 build_int_cst (arg1_type, 0)));
15617 gimple_set_location (g, loc);
15618 gsi_replace (gsi, g, true);
15621 /* Vector stores. */
15622 case ALTIVEC_BUILTIN_STVX_V16QI:
15623 case ALTIVEC_BUILTIN_STVX_V8HI:
15624 case ALTIVEC_BUILTIN_STVX_V4SI:
15625 case ALTIVEC_BUILTIN_STVX_V4SF:
15626 case ALTIVEC_BUILTIN_STVX_V2DI:
15627 case ALTIVEC_BUILTIN_STVX_V2DF:
15629 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
15630 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
15631 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
15632 location_t loc = gimple_location (stmt);
15633 tree arg0_type = TREE_TYPE (arg0);
15634 /* Use ptr_type_node (no TBAA) for the arg2_type.
15635 FIXME: (Richard) "A proper fix would be to transition this type as
15636 seen from the frontend to GIMPLE, for example in a similar way we
15637 do for MEM_REFs by piggy-backing that on an extra argument, a
15638 constant zero pointer of the alias pointer type to use (which would
15639 also serve as a type indicator of the store itself). I'd use a
15640 target specific internal function for this (not sure if we can have
15641 those target specific, but I guess if it's folded away then that's
15642 fine) and get away with the overload set." */
15643 tree arg2_type = ptr_type_node;
15644 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15645 the tree using the value from arg0. The resulting type will match
15646 the type of arg2. */
15647 gimple_seq stmts = NULL;
15648 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
15649 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15650 arg2_type, arg2, temp_offset);
15651 /* Mask off any lower bits from the address. */
15652 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15653 arg2_type, temp_addr,
15654 build_int_cst (arg2_type, -16));
15655 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15656 if (!is_gimple_mem_ref_addr (aligned_addr))
15658 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
15659 gimple *g = gimple_build_assign (t, aligned_addr);
15660 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15663 /* The desired gimple result should be similar to:
15664 MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */
15666 = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr,
15667 build_int_cst (arg2_type, 0)), arg0);
15668 gimple_set_location (g, loc);
15669 gsi_replace (gsi, g, true);
15673 /* unaligned Vector loads. */
15674 case VSX_BUILTIN_LXVW4X_V16QI:
15675 case VSX_BUILTIN_LXVW4X_V8HI:
15676 case VSX_BUILTIN_LXVW4X_V4SF:
15677 case VSX_BUILTIN_LXVW4X_V4SI:
15678 case VSX_BUILTIN_LXVD2X_V2DF:
15679 case VSX_BUILTIN_LXVD2X_V2DI:
15681 arg0 = gimple_call_arg (stmt, 0); // offset
15682 arg1 = gimple_call_arg (stmt, 1); // address
15683 lhs = gimple_call_lhs (stmt);
15684 location_t loc = gimple_location (stmt);
15685 /* Since arg1 may be cast to a different type, just use ptr_type_node
15686 here instead of trying to enforce TBAA on pointer types. */
15687 tree arg1_type = ptr_type_node;
15688 tree lhs_type = TREE_TYPE (lhs);
15689 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
15690 required alignment (power) is 4 bytes regardless of data type. */
15691 tree align_ltype = build_aligned_type (lhs_type, 4);
15692 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15693 the tree using the value from arg0. The resulting type will match
15694 the type of arg1. */
15695 gimple_seq stmts = NULL;
15696 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
15697 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15698 arg1_type, arg1, temp_offset);
15699 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15700 if (!is_gimple_mem_ref_addr (temp_addr))
15702 tree t = make_ssa_name (TREE_TYPE (temp_addr));
15703 gimple *g = gimple_build_assign (t, temp_addr);
15704 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15707 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
15708 take an offset, but since we've already incorporated the offset
15709 above, here we just pass in a zero. */
15711 g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
15712 build_int_cst (arg1_type, 0)));
15713 gimple_set_location (g, loc);
15714 gsi_replace (gsi, g, true);
15718 /* unaligned Vector stores. */
15719 case VSX_BUILTIN_STXVW4X_V16QI:
15720 case VSX_BUILTIN_STXVW4X_V8HI:
15721 case VSX_BUILTIN_STXVW4X_V4SF:
15722 case VSX_BUILTIN_STXVW4X_V4SI:
15723 case VSX_BUILTIN_STXVD2X_V2DF:
15724 case VSX_BUILTIN_STXVD2X_V2DI:
15726 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
15727 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
15728 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
15729 location_t loc = gimple_location (stmt);
15730 tree arg0_type = TREE_TYPE (arg0);
15731 /* Use ptr_type_node (no TBAA) for the arg2_type. */
15732 tree arg2_type = ptr_type_node;
15733 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
15734 required alignment (power) is 4 bytes regardless of data type. */
15735 tree align_stype = build_aligned_type (arg0_type, 4);
15736 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15737 the tree using the value from arg1. */
15738 gimple_seq stmts = NULL;
15739 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
15740 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15741 arg2_type, arg2, temp_offset);
15742 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15743 if (!is_gimple_mem_ref_addr (temp_addr))
15745 tree t = make_ssa_name (TREE_TYPE (temp_addr));
15746 gimple *g = gimple_build_assign (t, temp_addr);
15747 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15751 g = gimple_build_assign (build2 (MEM_REF, align_stype, temp_addr,
15752 build_int_cst (arg2_type, 0)), arg0);
15753 gimple_set_location (g, loc);
15754 gsi_replace (gsi, g, true);
15758 /* Vector Fused multiply-add (fma). */
15759 case ALTIVEC_BUILTIN_VMADDFP:
15760 case VSX_BUILTIN_XVMADDDP:
15761 case ALTIVEC_BUILTIN_VMLADDUHM:
15763 arg0 = gimple_call_arg (stmt, 0);
15764 arg1 = gimple_call_arg (stmt, 1);
15765 tree arg2 = gimple_call_arg (stmt, 2);
15766 lhs = gimple_call_lhs (stmt);
15767 gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2);
15768 gimple_call_set_lhs (g, lhs);
15769 gimple_call_set_nothrow (g, true);
15770 gimple_set_location (g, gimple_location (stmt));
15771 gsi_replace (gsi, g, true);
15775 /* Vector compares; EQ, NE, GE, GT, LE. */
15776 case ALTIVEC_BUILTIN_VCMPEQUB:
15777 case ALTIVEC_BUILTIN_VCMPEQUH:
15778 case ALTIVEC_BUILTIN_VCMPEQUW:
15779 case P8V_BUILTIN_VCMPEQUD:
15780 fold_compare_helper (gsi, EQ_EXPR, stmt);
15783 case P9V_BUILTIN_CMPNEB:
15784 case P9V_BUILTIN_CMPNEH:
15785 case P9V_BUILTIN_CMPNEW:
15786 fold_compare_helper (gsi, NE_EXPR, stmt);
15789 case VSX_BUILTIN_CMPGE_16QI:
15790 case VSX_BUILTIN_CMPGE_U16QI:
15791 case VSX_BUILTIN_CMPGE_8HI:
15792 case VSX_BUILTIN_CMPGE_U8HI:
15793 case VSX_BUILTIN_CMPGE_4SI:
15794 case VSX_BUILTIN_CMPGE_U4SI:
15795 case VSX_BUILTIN_CMPGE_2DI:
15796 case VSX_BUILTIN_CMPGE_U2DI:
15797 fold_compare_helper (gsi, GE_EXPR, stmt);
15800 case ALTIVEC_BUILTIN_VCMPGTSB:
15801 case ALTIVEC_BUILTIN_VCMPGTUB:
15802 case ALTIVEC_BUILTIN_VCMPGTSH:
15803 case ALTIVEC_BUILTIN_VCMPGTUH:
15804 case ALTIVEC_BUILTIN_VCMPGTSW:
15805 case ALTIVEC_BUILTIN_VCMPGTUW:
15806 case P8V_BUILTIN_VCMPGTUD:
15807 case P8V_BUILTIN_VCMPGTSD:
15808 fold_compare_helper (gsi, GT_EXPR, stmt);
15811 case VSX_BUILTIN_CMPLE_16QI:
15812 case VSX_BUILTIN_CMPLE_U16QI:
15813 case VSX_BUILTIN_CMPLE_8HI:
15814 case VSX_BUILTIN_CMPLE_U8HI:
15815 case VSX_BUILTIN_CMPLE_4SI:
15816 case VSX_BUILTIN_CMPLE_U4SI:
15817 case VSX_BUILTIN_CMPLE_2DI:
15818 case VSX_BUILTIN_CMPLE_U2DI:
15819 fold_compare_helper (gsi, LE_EXPR, stmt);
15822 /* flavors of vec_splat_[us]{8,16,32}. */
15823 case ALTIVEC_BUILTIN_VSPLTISB:
15824 case ALTIVEC_BUILTIN_VSPLTISH:
15825 case ALTIVEC_BUILTIN_VSPLTISW:
15827 arg0 = gimple_call_arg (stmt, 0);
15828 lhs = gimple_call_lhs (stmt);
15830 /* Only fold the vec_splat_*() if the lower bits of arg 0 is a
15831 5-bit signed constant in range -16 to +15. */
15832 if (TREE_CODE (arg0) != INTEGER_CST
15833 || !IN_RANGE (TREE_INT_CST_LOW (arg0), -16, 15))
15835 gimple_seq stmts = NULL;
15836 location_t loc = gimple_location (stmt);
15837 tree splat_value = gimple_convert (&stmts, loc,
15838 TREE_TYPE (TREE_TYPE (lhs)), arg0);
15839 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15840 tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value);
15841 g = gimple_build_assign (lhs, splat_tree);
15842 gimple_set_location (g, gimple_location (stmt));
15843 gsi_replace (gsi, g, true);
15847 /* Flavors of vec_splat. */
15848 /* a = vec_splat (b, 0x3) becomes a = { b[3],b[3],b[3],...}; */
15849 case ALTIVEC_BUILTIN_VSPLTB:
15850 case ALTIVEC_BUILTIN_VSPLTH:
15851 case ALTIVEC_BUILTIN_VSPLTW:
15852 case VSX_BUILTIN_XXSPLTD_V2DI:
15853 case VSX_BUILTIN_XXSPLTD_V2DF:
15855 arg0 = gimple_call_arg (stmt, 0); /* input vector. */
15856 arg1 = gimple_call_arg (stmt, 1); /* index into arg0. */
15857 /* Only fold the vec_splat_*() if arg1 is both a constant value and
15858 is a valid index into the arg0 vector. */
15859 unsigned int n_elts = VECTOR_CST_NELTS (arg0);
15860 if (TREE_CODE (arg1) != INTEGER_CST
15861 || TREE_INT_CST_LOW (arg1) > (n_elts -1))
15863 lhs = gimple_call_lhs (stmt);
15864 tree lhs_type = TREE_TYPE (lhs);
15865 tree arg0_type = TREE_TYPE (arg0);
15867 if (TREE_CODE (arg0) == VECTOR_CST)
15868 splat = VECTOR_CST_ELT (arg0, TREE_INT_CST_LOW (arg1));
15871 /* Determine (in bits) the length and start location of the
15872 splat value for a call to the tree_vec_extract helper. */
15873 int splat_elem_size = TREE_INT_CST_LOW (size_in_bytes (arg0_type))
15874 * BITS_PER_UNIT / n_elts;
15875 int splat_start_bit = TREE_INT_CST_LOW (arg1) * splat_elem_size;
15876 tree len = build_int_cst (bitsizetype, splat_elem_size);
15877 tree start = build_int_cst (bitsizetype, splat_start_bit);
15878 splat = tree_vec_extract (gsi, TREE_TYPE (lhs_type), arg0,
15881 /* And finally, build the new vector. */
15882 tree splat_tree = build_vector_from_val (lhs_type, splat);
15883 g = gimple_build_assign (lhs, splat_tree);
15884 gimple_set_location (g, gimple_location (stmt));
15885 gsi_replace (gsi, g, true);
15889 /* vec_mergel (integrals). */
15890 case ALTIVEC_BUILTIN_VMRGLH:
15891 case ALTIVEC_BUILTIN_VMRGLW:
15892 case VSX_BUILTIN_XXMRGLW_4SI:
15893 case ALTIVEC_BUILTIN_VMRGLB:
15894 case VSX_BUILTIN_VEC_MERGEL_V2DI:
15895 case VSX_BUILTIN_XXMRGLW_4SF:
15896 case VSX_BUILTIN_VEC_MERGEL_V2DF:
15897 fold_mergehl_helper (gsi, stmt, 1);
15899 /* vec_mergeh (integrals). */
15900 case ALTIVEC_BUILTIN_VMRGHH:
15901 case ALTIVEC_BUILTIN_VMRGHW:
15902 case VSX_BUILTIN_XXMRGHW_4SI:
15903 case ALTIVEC_BUILTIN_VMRGHB:
15904 case VSX_BUILTIN_VEC_MERGEH_V2DI:
15905 case VSX_BUILTIN_XXMRGHW_4SF:
15906 case VSX_BUILTIN_VEC_MERGEH_V2DF:
15907 fold_mergehl_helper (gsi, stmt, 0);
15910 /* Flavors of vec_mergee. */
15911 case P8V_BUILTIN_VMRGEW_V4SI:
15912 case P8V_BUILTIN_VMRGEW_V2DI:
15913 case P8V_BUILTIN_VMRGEW_V4SF:
15914 case P8V_BUILTIN_VMRGEW_V2DF:
15915 fold_mergeeo_helper (gsi, stmt, 0);
15917 /* Flavors of vec_mergeo. */
15918 case P8V_BUILTIN_VMRGOW_V4SI:
15919 case P8V_BUILTIN_VMRGOW_V2DI:
15920 case P8V_BUILTIN_VMRGOW_V4SF:
15921 case P8V_BUILTIN_VMRGOW_V2DF:
15922 fold_mergeeo_helper (gsi, stmt, 1);
15925 /* d = vec_pack (a, b) */
15926 case P8V_BUILTIN_VPKUDUM:
15927 case ALTIVEC_BUILTIN_VPKUHUM:
15928 case ALTIVEC_BUILTIN_VPKUWUM:
15930 arg0 = gimple_call_arg (stmt, 0);
15931 arg1 = gimple_call_arg (stmt, 1);
15932 lhs = gimple_call_lhs (stmt);
15933 gimple *g = gimple_build_assign (lhs, VEC_PACK_TRUNC_EXPR, arg0, arg1);
15934 gimple_set_location (g, gimple_location (stmt));
15935 gsi_replace (gsi, g, true);
15939 /* d = vec_unpackh (a) */
15940 /* Note that the UNPACK_{HI,LO}_EXPR used in the gimple_build_assign call
15941 in this code is sensitive to endian-ness, and needs to be inverted to
15942 handle both LE and BE targets. */
15943 case ALTIVEC_BUILTIN_VUPKHSB:
15944 case ALTIVEC_BUILTIN_VUPKHSH:
15945 case P8V_BUILTIN_VUPKHSW:
15947 arg0 = gimple_call_arg (stmt, 0);
15948 lhs = gimple_call_lhs (stmt);
15949 if (BYTES_BIG_ENDIAN)
15950 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
15952 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
15953 gimple_set_location (g, gimple_location (stmt));
15954 gsi_replace (gsi, g, true);
15957 /* d = vec_unpackl (a) */
15958 case ALTIVEC_BUILTIN_VUPKLSB:
15959 case ALTIVEC_BUILTIN_VUPKLSH:
15960 case P8V_BUILTIN_VUPKLSW:
15962 arg0 = gimple_call_arg (stmt, 0);
15963 lhs = gimple_call_lhs (stmt);
15964 if (BYTES_BIG_ENDIAN)
15965 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
15967 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
15968 gimple_set_location (g, gimple_location (stmt));
15969 gsi_replace (gsi, g, true);
15972 /* There is no gimple type corresponding with pixel, so just return. */
15973 case ALTIVEC_BUILTIN_VUPKHPX:
15974 case ALTIVEC_BUILTIN_VUPKLPX:
15978 case ALTIVEC_BUILTIN_VPERM_16QI:
15979 case ALTIVEC_BUILTIN_VPERM_8HI:
15980 case ALTIVEC_BUILTIN_VPERM_4SI:
15981 case ALTIVEC_BUILTIN_VPERM_2DI:
15982 case ALTIVEC_BUILTIN_VPERM_4SF:
15983 case ALTIVEC_BUILTIN_VPERM_2DF:
15985 arg0 = gimple_call_arg (stmt, 0);
15986 arg1 = gimple_call_arg (stmt, 1);
15987 tree permute = gimple_call_arg (stmt, 2);
15988 lhs = gimple_call_lhs (stmt);
15989 location_t loc = gimple_location (stmt);
15990 gimple_seq stmts = NULL;
15991 // convert arg0 and arg1 to match the type of the permute
15992 // for the VEC_PERM_EXPR operation.
15993 tree permute_type = (TREE_TYPE (permute));
15994 tree arg0_ptype = gimple_convert (&stmts, loc, permute_type, arg0);
15995 tree arg1_ptype = gimple_convert (&stmts, loc, permute_type, arg1);
15996 tree lhs_ptype = gimple_build (&stmts, loc, VEC_PERM_EXPR,
15997 permute_type, arg0_ptype, arg1_ptype,
15999 // Convert the result back to the desired lhs type upon completion.
16000 tree temp = gimple_convert (&stmts, loc, TREE_TYPE (lhs), lhs_ptype);
16001 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
16002 g = gimple_build_assign (lhs, temp);
16003 gimple_set_location (g, loc);
16004 gsi_replace (gsi, g, true);
16009 if (TARGET_DEBUG_BUILTIN)
16010 fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
16011 fn_code, fn_name1, fn_name2);
16018 /* Expand an expression EXP that calls a built-in function,
16019 with result going to TARGET if that's convenient
16020 (and in mode MODE if that's convenient).
16021 SUBTARGET may be used as the target for computing one of EXP's operands.
16022 IGNORE is nonzero if the value is to be ignored. */
16025 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16026 machine_mode mode ATTRIBUTE_UNUSED,
16027 int ignore ATTRIBUTE_UNUSED)
16029 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16030 enum rs6000_builtins fcode
16031 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
16032 size_t uns_fcode = (size_t)fcode;
16033 const struct builtin_description *d;
16037 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
16038 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
16039 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
16041 /* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
16042 floating point type, depending on whether long double is the IBM extended
16043 double (KFmode) or long double is IEEE 128-bit (TFmode). It is simpler if
16044 we only define one variant of the built-in function, and switch the code
16045 when defining it, rather than defining two built-ins and using the
16046 overload table in rs6000-c.c to switch between the two. If we don't have
16047 the proper assembler, don't do this switch because CODE_FOR_*kf* and
16048 CODE_FOR_*tf* will be CODE_FOR_nothing. */
16049 if (FLOAT128_IEEE_P (TFmode))
16055 case CODE_FOR_sqrtkf2_odd: icode = CODE_FOR_sqrttf2_odd; break;
16056 case CODE_FOR_trunckfdf2_odd: icode = CODE_FOR_trunctfdf2_odd; break;
16057 case CODE_FOR_addkf3_odd: icode = CODE_FOR_addtf3_odd; break;
16058 case CODE_FOR_subkf3_odd: icode = CODE_FOR_subtf3_odd; break;
16059 case CODE_FOR_mulkf3_odd: icode = CODE_FOR_multf3_odd; break;
16060 case CODE_FOR_divkf3_odd: icode = CODE_FOR_divtf3_odd; break;
16061 case CODE_FOR_fmakf4_odd: icode = CODE_FOR_fmatf4_odd; break;
16062 case CODE_FOR_xsxexpqp_kf: icode = CODE_FOR_xsxexpqp_tf; break;
16063 case CODE_FOR_xsxsigqp_kf: icode = CODE_FOR_xsxsigqp_tf; break;
16064 case CODE_FOR_xststdcnegqp_kf: icode = CODE_FOR_xststdcnegqp_tf; break;
16065 case CODE_FOR_xsiexpqp_kf: icode = CODE_FOR_xsiexpqp_tf; break;
16066 case CODE_FOR_xsiexpqpf_kf: icode = CODE_FOR_xsiexpqpf_tf; break;
16067 case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
16070 if (TARGET_DEBUG_BUILTIN)
16072 const char *name1 = rs6000_builtin_info[uns_fcode].name;
16073 const char *name2 = (icode != CODE_FOR_nothing)
16074 ? get_insn_name ((int) icode)
16078 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
16080 default: name3 = "unknown"; break;
16081 case RS6000_BTC_SPECIAL: name3 = "special"; break;
16082 case RS6000_BTC_UNARY: name3 = "unary"; break;
16083 case RS6000_BTC_BINARY: name3 = "binary"; break;
16084 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
16085 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
16086 case RS6000_BTC_ABS: name3 = "abs"; break;
16087 case RS6000_BTC_DST: name3 = "dst"; break;
16092 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
16093 (name1) ? name1 : "---", fcode,
16094 (name2) ? name2 : "---", (int) icode,
16096 func_valid_p ? "" : ", not valid");
16101 rs6000_invalid_builtin (fcode);
16103 /* Given it is invalid, just generate a normal call. */
16104 return expand_call (exp, target, ignore);
16109 case RS6000_BUILTIN_RECIP:
16110 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
16112 case RS6000_BUILTIN_RECIPF:
16113 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
16115 case RS6000_BUILTIN_RSQRTF:
16116 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
16118 case RS6000_BUILTIN_RSQRT:
16119 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
16121 case POWER7_BUILTIN_BPERMD:
16122 return rs6000_expand_binop_builtin (((TARGET_64BIT)
16123 ? CODE_FOR_bpermd_di
16124 : CODE_FOR_bpermd_si), exp, target);
16126 case RS6000_BUILTIN_GET_TB:
16127 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16130 case RS6000_BUILTIN_MFTB:
16131 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16132 ? CODE_FOR_rs6000_mftb_di
16133 : CODE_FOR_rs6000_mftb_si),
16136 case RS6000_BUILTIN_MFFS:
16137 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16139 case RS6000_BUILTIN_MTFSB0:
16140 return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
16142 case RS6000_BUILTIN_MTFSB1:
16143 return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
16145 case RS6000_BUILTIN_SET_FPSCR_RN:
16146 return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
16149 case RS6000_BUILTIN_SET_FPSCR_DRN:
16151 rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
16154 case RS6000_BUILTIN_MFFSL:
16155 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
16157 case RS6000_BUILTIN_MTFSF:
16158 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16160 case RS6000_BUILTIN_CPU_INIT:
16161 case RS6000_BUILTIN_CPU_IS:
16162 case RS6000_BUILTIN_CPU_SUPPORTS:
16163 return cpu_expand_builtin (fcode, exp, target);
16165 case MISC_BUILTIN_SPEC_BARRIER:
16167 emit_insn (gen_speculation_barrier ());
16171 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16172 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16174 int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16175 : (int) CODE_FOR_altivec_lvsl_direct);
16176 machine_mode tmode = insn_data[icode2].operand[0].mode;
16177 machine_mode mode = insn_data[icode2].operand[1].mode;
16181 gcc_assert (TARGET_ALTIVEC);
16183 arg = CALL_EXPR_ARG (exp, 0);
16184 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16185 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16186 addr = memory_address (mode, op);
16187 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16191 /* For the load case need to negate the address. */
16192 op = gen_reg_rtx (GET_MODE (addr));
16193 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16195 op = gen_rtx_MEM (mode, op);
16198 || GET_MODE (target) != tmode
16199 || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
16200 target = gen_reg_rtx (tmode);
16202 pat = GEN_FCN (icode2) (target, op);
16210 case ALTIVEC_BUILTIN_VCFUX:
16211 case ALTIVEC_BUILTIN_VCFSX:
16212 case ALTIVEC_BUILTIN_VCTUXS:
16213 case ALTIVEC_BUILTIN_VCTSXS:
16214 /* FIXME: There's got to be a nicer way to handle this case than
16215 constructing a new CALL_EXPR. */
16216 if (call_expr_nargs (exp) == 1)
16218 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16219 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16223 /* For the pack and unpack int128 routines, fix up the builtin so it
16224 uses the correct IBM128 type. */
16225 case MISC_BUILTIN_PACK_IF:
16226 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
16228 icode = CODE_FOR_packtf;
16229 fcode = MISC_BUILTIN_PACK_TF;
16230 uns_fcode = (size_t)fcode;
16234 case MISC_BUILTIN_UNPACK_IF:
16235 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
16237 icode = CODE_FOR_unpacktf;
16238 fcode = MISC_BUILTIN_UNPACK_TF;
16239 uns_fcode = (size_t)fcode;
16247 if (TARGET_ALTIVEC)
16249 ret = altivec_expand_builtin (exp, target, &success);
16256 ret = htm_expand_builtin (exp, target, &success);
16262 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16263 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16264 gcc_assert (attr == RS6000_BTC_UNARY
16265 || attr == RS6000_BTC_BINARY
16266 || attr == RS6000_BTC_TERNARY
16267 || attr == RS6000_BTC_SPECIAL);
16269 /* Handle simple unary operations. */
16271 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16272 if (d->code == fcode)
16273 return rs6000_expand_unop_builtin (icode, exp, target);
16275 /* Handle simple binary operations. */
16277 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16278 if (d->code == fcode)
16279 return rs6000_expand_binop_builtin (icode, exp, target);
16281 /* Handle simple ternary operations. */
16283 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16284 if (d->code == fcode)
16285 return rs6000_expand_ternop_builtin (icode, exp, target);
16287 /* Handle simple no-argument operations. */
16289 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16290 if (d->code == fcode)
16291 return rs6000_expand_zeroop_builtin (icode, target);
16293 gcc_unreachable ();
16296 /* Create a builtin vector type with a name. Taking care not to give
16297 the canonical type a name. */
16300 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
16302 tree result = build_vector_type (elt_type, num_elts);
16304 /* Copy so we don't give the canonical type a name. */
16305 result = build_variant_type_copy (result);
16307 add_builtin_type (name, result);
16313 rs6000_init_builtins (void)
16319 if (TARGET_DEBUG_BUILTIN)
16320 fprintf (stderr, "rs6000_init_builtins%s%s\n",
16321 (TARGET_ALTIVEC) ? ", altivec" : "",
16322 (TARGET_VSX) ? ", vsx" : "");
16324 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
16325 : "__vector long long",
16326 intDI_type_node, 2);
16327 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
16328 V4SI_type_node = rs6000_vector_type ("__vector signed int",
16329 intSI_type_node, 4);
16330 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
16331 V8HI_type_node = rs6000_vector_type ("__vector signed short",
16332 intHI_type_node, 8);
16333 V16QI_type_node = rs6000_vector_type ("__vector signed char",
16334 intQI_type_node, 16);
16336 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
16337 unsigned_intQI_type_node, 16);
16338 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
16339 unsigned_intHI_type_node, 8);
16340 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
16341 unsigned_intSI_type_node, 4);
16342 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16343 ? "__vector unsigned long"
16344 : "__vector unsigned long long",
16345 unsigned_intDI_type_node, 2);
16347 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16349 const_str_type_node
16350 = build_pointer_type (build_qualified_type (char_type_node,
16353 /* We use V1TI mode as a special container to hold __int128_t items that
16354 must live in VSX registers. */
16355 if (intTI_type_node)
16357 V1TI_type_node = rs6000_vector_type ("__vector __int128",
16358 intTI_type_node, 1);
16359 unsigned_V1TI_type_node
16360 = rs6000_vector_type ("__vector unsigned __int128",
16361 unsigned_intTI_type_node, 1);
16364 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16365 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16366 'vector unsigned short'. */
16368 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16369 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16370 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16371 bool_long_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16372 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16374 long_integer_type_internal_node = long_integer_type_node;
16375 long_unsigned_type_internal_node = long_unsigned_type_node;
16376 long_long_integer_type_internal_node = long_long_integer_type_node;
16377 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16378 intQI_type_internal_node = intQI_type_node;
16379 uintQI_type_internal_node = unsigned_intQI_type_node;
16380 intHI_type_internal_node = intHI_type_node;
16381 uintHI_type_internal_node = unsigned_intHI_type_node;
16382 intSI_type_internal_node = intSI_type_node;
16383 uintSI_type_internal_node = unsigned_intSI_type_node;
16384 intDI_type_internal_node = intDI_type_node;
16385 uintDI_type_internal_node = unsigned_intDI_type_node;
16386 intTI_type_internal_node = intTI_type_node;
16387 uintTI_type_internal_node = unsigned_intTI_type_node;
16388 float_type_internal_node = float_type_node;
16389 double_type_internal_node = double_type_node;
16390 long_double_type_internal_node = long_double_type_node;
16391 dfloat64_type_internal_node = dfloat64_type_node;
16392 dfloat128_type_internal_node = dfloat128_type_node;
16393 void_type_internal_node = void_type_node;
16395 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16396 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16397 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16398 format that uses a pair of doubles, depending on the switches and
16401 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16402 floating point, we need make sure the type is non-zero or else self-test
16403 fails during bootstrap.
16405 Always create __ibm128 as a separate type, even if the current long double
16406 format is IBM extended double.
16408 For IEEE 128-bit floating point, always create the type __ieee128. If the
16409 user used -mfloat128, rs6000-c.c will create a define from __float128 to
16411 if (TARGET_FLOAT128_TYPE)
16413 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
16414 ibm128_float_type_node = long_double_type_node;
16417 ibm128_float_type_node = make_node (REAL_TYPE);
16418 TYPE_PRECISION (ibm128_float_type_node) = 128;
16419 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16420 layout_type (ibm128_float_type_node);
16423 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16426 if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
16427 ieee128_float_type_node = long_double_type_node;
16429 ieee128_float_type_node = float128_type_node;
16431 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16436 ieee128_float_type_node = ibm128_float_type_node = long_double_type_node;
16438 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16440 builtin_mode_to_type[QImode][0] = integer_type_node;
16441 builtin_mode_to_type[HImode][0] = integer_type_node;
16442 builtin_mode_to_type[SImode][0] = intSI_type_node;
16443 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16444 builtin_mode_to_type[DImode][0] = intDI_type_node;
16445 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16446 builtin_mode_to_type[TImode][0] = intTI_type_node;
16447 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16448 builtin_mode_to_type[SFmode][0] = float_type_node;
16449 builtin_mode_to_type[DFmode][0] = double_type_node;
16450 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16451 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16452 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16453 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16454 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16455 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16456 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16457 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16458 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16459 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16460 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16461 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16462 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16463 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16464 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16465 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16466 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16468 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16469 TYPE_NAME (bool_char_type_node) = tdecl;
16471 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16472 TYPE_NAME (bool_short_type_node) = tdecl;
16474 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16475 TYPE_NAME (bool_int_type_node) = tdecl;
16477 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16478 TYPE_NAME (pixel_type_node) = tdecl;
16480 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
16481 bool_char_type_node, 16);
16482 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
16483 bool_short_type_node, 8);
16484 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
16485 bool_int_type_node, 4);
16486 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16487 ? "__vector __bool long"
16488 : "__vector __bool long long",
16489 bool_long_long_type_node, 2);
16490 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
16491 pixel_type_node, 8);
16493 /* Create Altivec and VSX builtins on machines with at least the
16494 general purpose extensions (970 and newer) to allow the use of
16495 the target attribute. */
16496 if (TARGET_EXTRA_BUILTINS)
16497 altivec_init_builtins ();
16499 htm_init_builtins ();
16501 if (TARGET_EXTRA_BUILTINS)
16502 rs6000_common_init_builtins ();
16504 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16505 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16506 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16508 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16509 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16510 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16512 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16513 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16514 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16516 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16517 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16518 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16520 mode = (TARGET_64BIT) ? DImode : SImode;
16521 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16522 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16523 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16525 ftype = build_function_type_list (unsigned_intDI_type_node,
16527 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16530 ftype = build_function_type_list (unsigned_intDI_type_node,
16533 ftype = build_function_type_list (unsigned_intSI_type_node,
16535 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16537 ftype = build_function_type_list (double_type_node, NULL_TREE);
16538 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16540 ftype = build_function_type_list (double_type_node, NULL_TREE);
16541 def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL);
16543 ftype = build_function_type_list (void_type_node,
16546 def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0);
16548 ftype = build_function_type_list (void_type_node,
16551 def_builtin ("__builtin_mtfsb1", ftype, RS6000_BUILTIN_MTFSB1);
16553 ftype = build_function_type_list (void_type_node,
16556 def_builtin ("__builtin_set_fpscr_rn", ftype, RS6000_BUILTIN_SET_FPSCR_RN);
16558 ftype = build_function_type_list (void_type_node,
16561 def_builtin ("__builtin_set_fpscr_drn", ftype, RS6000_BUILTIN_SET_FPSCR_DRN);
16563 ftype = build_function_type_list (void_type_node,
16564 intSI_type_node, double_type_node,
16566 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16568 ftype = build_function_type_list (void_type_node, NULL_TREE);
16569 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16570 def_builtin ("__builtin_ppc_speculation_barrier", ftype,
16571 MISC_BUILTIN_SPEC_BARRIER);
16573 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16575 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16576 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16578 /* AIX libm provides clog as __clog. */
16579 if (TARGET_XCOFF &&
16580 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
16581 set_user_assembler_name (tdecl, "__clog");
16583 #ifdef SUBTARGET_INIT_BUILTINS
16584 SUBTARGET_INIT_BUILTINS;
16588 /* Returns the rs6000 builtin decl for CODE. */
16591 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
16593 HOST_WIDE_INT fnmask;
16595 if (code >= RS6000_BUILTIN_COUNT)
16596 return error_mark_node;
16598 fnmask = rs6000_builtin_info[code].mask;
16599 if ((fnmask & rs6000_builtin_mask) != fnmask)
16601 rs6000_invalid_builtin ((enum rs6000_builtins)code);
16602 return error_mark_node;
16605 return rs6000_builtin_decls[code];
16609 altivec_init_builtins (void)
16611 const struct builtin_description *d;
16615 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16617 tree pvoid_type_node = build_pointer_type (void_type_node);
16619 tree pcvoid_type_node
16620 = build_pointer_type (build_qualified_type (void_type_node,
16623 tree int_ftype_opaque
16624 = build_function_type_list (integer_type_node,
16625 opaque_V4SI_type_node, NULL_TREE);
16626 tree opaque_ftype_opaque
16627 = build_function_type_list (integer_type_node, NULL_TREE);
16628 tree opaque_ftype_opaque_int
16629 = build_function_type_list (opaque_V4SI_type_node,
16630 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
16631 tree opaque_ftype_opaque_opaque_int
16632 = build_function_type_list (opaque_V4SI_type_node,
16633 opaque_V4SI_type_node, opaque_V4SI_type_node,
16634 integer_type_node, NULL_TREE);
16635 tree opaque_ftype_opaque_opaque_opaque
16636 = build_function_type_list (opaque_V4SI_type_node,
16637 opaque_V4SI_type_node, opaque_V4SI_type_node,
16638 opaque_V4SI_type_node, NULL_TREE);
16639 tree opaque_ftype_opaque_opaque
16640 = build_function_type_list (opaque_V4SI_type_node,
16641 opaque_V4SI_type_node, opaque_V4SI_type_node,
16643 tree int_ftype_int_opaque_opaque
16644 = build_function_type_list (integer_type_node,
16645 integer_type_node, opaque_V4SI_type_node,
16646 opaque_V4SI_type_node, NULL_TREE);
16647 tree int_ftype_int_v4si_v4si
16648 = build_function_type_list (integer_type_node,
16649 integer_type_node, V4SI_type_node,
16650 V4SI_type_node, NULL_TREE);
16651 tree int_ftype_int_v2di_v2di
16652 = build_function_type_list (integer_type_node,
16653 integer_type_node, V2DI_type_node,
16654 V2DI_type_node, NULL_TREE);
16655 tree void_ftype_v4si
16656 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
16657 tree v8hi_ftype_void
16658 = build_function_type_list (V8HI_type_node, NULL_TREE);
16659 tree void_ftype_void
16660 = build_function_type_list (void_type_node, NULL_TREE);
16661 tree void_ftype_int
16662 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
16664 tree opaque_ftype_long_pcvoid
16665 = build_function_type_list (opaque_V4SI_type_node,
16666 long_integer_type_node, pcvoid_type_node,
16668 tree v16qi_ftype_long_pcvoid
16669 = build_function_type_list (V16QI_type_node,
16670 long_integer_type_node, pcvoid_type_node,
16672 tree v8hi_ftype_long_pcvoid
16673 = build_function_type_list (V8HI_type_node,
16674 long_integer_type_node, pcvoid_type_node,
16676 tree v4si_ftype_long_pcvoid
16677 = build_function_type_list (V4SI_type_node,
16678 long_integer_type_node, pcvoid_type_node,
16680 tree v4sf_ftype_long_pcvoid
16681 = build_function_type_list (V4SF_type_node,
16682 long_integer_type_node, pcvoid_type_node,
16684 tree v2df_ftype_long_pcvoid
16685 = build_function_type_list (V2DF_type_node,
16686 long_integer_type_node, pcvoid_type_node,
16688 tree v2di_ftype_long_pcvoid
16689 = build_function_type_list (V2DI_type_node,
16690 long_integer_type_node, pcvoid_type_node,
16692 tree v1ti_ftype_long_pcvoid
16693 = build_function_type_list (V1TI_type_node,
16694 long_integer_type_node, pcvoid_type_node,
16697 tree void_ftype_opaque_long_pvoid
16698 = build_function_type_list (void_type_node,
16699 opaque_V4SI_type_node, long_integer_type_node,
16700 pvoid_type_node, NULL_TREE);
16701 tree void_ftype_v4si_long_pvoid
16702 = build_function_type_list (void_type_node,
16703 V4SI_type_node, long_integer_type_node,
16704 pvoid_type_node, NULL_TREE);
16705 tree void_ftype_v16qi_long_pvoid
16706 = build_function_type_list (void_type_node,
16707 V16QI_type_node, long_integer_type_node,
16708 pvoid_type_node, NULL_TREE);
16710 tree void_ftype_v16qi_pvoid_long
16711 = build_function_type_list (void_type_node,
16712 V16QI_type_node, pvoid_type_node,
16713 long_integer_type_node, NULL_TREE);
16715 tree void_ftype_v8hi_long_pvoid
16716 = build_function_type_list (void_type_node,
16717 V8HI_type_node, long_integer_type_node,
16718 pvoid_type_node, NULL_TREE);
16719 tree void_ftype_v4sf_long_pvoid
16720 = build_function_type_list (void_type_node,
16721 V4SF_type_node, long_integer_type_node,
16722 pvoid_type_node, NULL_TREE);
16723 tree void_ftype_v2df_long_pvoid
16724 = build_function_type_list (void_type_node,
16725 V2DF_type_node, long_integer_type_node,
16726 pvoid_type_node, NULL_TREE);
16727 tree void_ftype_v1ti_long_pvoid
16728 = build_function_type_list (void_type_node,
16729 V1TI_type_node, long_integer_type_node,
16730 pvoid_type_node, NULL_TREE);
16731 tree void_ftype_v2di_long_pvoid
16732 = build_function_type_list (void_type_node,
16733 V2DI_type_node, long_integer_type_node,
16734 pvoid_type_node, NULL_TREE);
16735 tree int_ftype_int_v8hi_v8hi
16736 = build_function_type_list (integer_type_node,
16737 integer_type_node, V8HI_type_node,
16738 V8HI_type_node, NULL_TREE);
16739 tree int_ftype_int_v16qi_v16qi
16740 = build_function_type_list (integer_type_node,
16741 integer_type_node, V16QI_type_node,
16742 V16QI_type_node, NULL_TREE);
16743 tree int_ftype_int_v4sf_v4sf
16744 = build_function_type_list (integer_type_node,
16745 integer_type_node, V4SF_type_node,
16746 V4SF_type_node, NULL_TREE);
16747 tree int_ftype_int_v2df_v2df
16748 = build_function_type_list (integer_type_node,
16749 integer_type_node, V2DF_type_node,
16750 V2DF_type_node, NULL_TREE);
16751 tree v2di_ftype_v2di
16752 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
16753 tree v4si_ftype_v4si
16754 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16755 tree v8hi_ftype_v8hi
16756 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16757 tree v16qi_ftype_v16qi
16758 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16759 tree v4sf_ftype_v4sf
16760 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16761 tree v2df_ftype_v2df
16762 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16763 tree void_ftype_pcvoid_int_int
16764 = build_function_type_list (void_type_node,
16765 pcvoid_type_node, integer_type_node,
16766 integer_type_node, NULL_TREE);
16768 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
16769 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
16770 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
16771 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
16772 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
16773 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
16774 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
16775 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
16776 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
16777 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
16778 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
16779 ALTIVEC_BUILTIN_LVXL_V2DF);
16780 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
16781 ALTIVEC_BUILTIN_LVXL_V2DI);
16782 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
16783 ALTIVEC_BUILTIN_LVXL_V4SF);
16784 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
16785 ALTIVEC_BUILTIN_LVXL_V4SI);
16786 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
16787 ALTIVEC_BUILTIN_LVXL_V8HI);
16788 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
16789 ALTIVEC_BUILTIN_LVXL_V16QI);
16790 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
16791 def_builtin ("__builtin_altivec_lvx_v1ti", v1ti_ftype_long_pcvoid,
16792 ALTIVEC_BUILTIN_LVX_V1TI);
16793 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
16794 ALTIVEC_BUILTIN_LVX_V2DF);
16795 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
16796 ALTIVEC_BUILTIN_LVX_V2DI);
16797 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
16798 ALTIVEC_BUILTIN_LVX_V4SF);
16799 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
16800 ALTIVEC_BUILTIN_LVX_V4SI);
16801 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
16802 ALTIVEC_BUILTIN_LVX_V8HI);
16803 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
16804 ALTIVEC_BUILTIN_LVX_V16QI);
16805 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
16806 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
16807 ALTIVEC_BUILTIN_STVX_V2DF);
16808 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
16809 ALTIVEC_BUILTIN_STVX_V2DI);
16810 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
16811 ALTIVEC_BUILTIN_STVX_V4SF);
16812 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
16813 ALTIVEC_BUILTIN_STVX_V4SI);
16814 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
16815 ALTIVEC_BUILTIN_STVX_V8HI);
16816 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
16817 ALTIVEC_BUILTIN_STVX_V16QI);
16818 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
16819 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
16820 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
16821 ALTIVEC_BUILTIN_STVXL_V2DF);
16822 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
16823 ALTIVEC_BUILTIN_STVXL_V2DI);
16824 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
16825 ALTIVEC_BUILTIN_STVXL_V4SF);
16826 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
16827 ALTIVEC_BUILTIN_STVXL_V4SI);
16828 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
16829 ALTIVEC_BUILTIN_STVXL_V8HI);
16830 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
16831 ALTIVEC_BUILTIN_STVXL_V16QI);
16832 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
16833 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
16834 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
16835 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
16836 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
16837 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
16838 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
16839 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
16840 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
16841 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
16842 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
16843 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
16844 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
16845 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
16846 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
16847 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
16849 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
16850 VSX_BUILTIN_LXVD2X_V2DF);
16851 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
16852 VSX_BUILTIN_LXVD2X_V2DI);
16853 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
16854 VSX_BUILTIN_LXVW4X_V4SF);
16855 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
16856 VSX_BUILTIN_LXVW4X_V4SI);
16857 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
16858 VSX_BUILTIN_LXVW4X_V8HI);
16859 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
16860 VSX_BUILTIN_LXVW4X_V16QI);
16861 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
16862 VSX_BUILTIN_STXVD2X_V2DF);
16863 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
16864 VSX_BUILTIN_STXVD2X_V2DI);
16865 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
16866 VSX_BUILTIN_STXVW4X_V4SF);
16867 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
16868 VSX_BUILTIN_STXVW4X_V4SI);
16869 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
16870 VSX_BUILTIN_STXVW4X_V8HI);
16871 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
16872 VSX_BUILTIN_STXVW4X_V16QI);
16874 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
16875 VSX_BUILTIN_LD_ELEMREV_V2DF);
16876 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
16877 VSX_BUILTIN_LD_ELEMREV_V2DI);
16878 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
16879 VSX_BUILTIN_LD_ELEMREV_V4SF);
16880 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
16881 VSX_BUILTIN_LD_ELEMREV_V4SI);
16882 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
16883 VSX_BUILTIN_LD_ELEMREV_V8HI);
16884 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
16885 VSX_BUILTIN_LD_ELEMREV_V16QI);
16886 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
16887 VSX_BUILTIN_ST_ELEMREV_V2DF);
16888 def_builtin ("__builtin_vsx_st_elemrev_v1ti", void_ftype_v1ti_long_pvoid,
16889 VSX_BUILTIN_ST_ELEMREV_V1TI);
16890 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
16891 VSX_BUILTIN_ST_ELEMREV_V2DI);
16892 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
16893 VSX_BUILTIN_ST_ELEMREV_V4SF);
16894 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
16895 VSX_BUILTIN_ST_ELEMREV_V4SI);
16896 def_builtin ("__builtin_vsx_st_elemrev_v8hi", void_ftype_v8hi_long_pvoid,
16897 VSX_BUILTIN_ST_ELEMREV_V8HI);
16898 def_builtin ("__builtin_vsx_st_elemrev_v16qi", void_ftype_v16qi_long_pvoid,
16899 VSX_BUILTIN_ST_ELEMREV_V16QI);
16901 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
16902 VSX_BUILTIN_VEC_LD);
16903 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
16904 VSX_BUILTIN_VEC_ST);
16905 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
16906 VSX_BUILTIN_VEC_XL);
16907 def_builtin ("__builtin_vec_xl_be", opaque_ftype_long_pcvoid,
16908 VSX_BUILTIN_VEC_XL_BE);
16909 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
16910 VSX_BUILTIN_VEC_XST);
16911 def_builtin ("__builtin_vec_xst_be", void_ftype_opaque_long_pvoid,
16912 VSX_BUILTIN_VEC_XST_BE);
16914 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
16915 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
16916 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
16918 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
16919 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
16920 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
16921 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
16922 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
16923 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
16924 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
16925 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
16926 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
16927 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
16928 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
16929 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
16931 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
16932 ALTIVEC_BUILTIN_VEC_ADDE);
16933 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
16934 ALTIVEC_BUILTIN_VEC_ADDEC);
16935 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
16936 ALTIVEC_BUILTIN_VEC_CMPNE);
16937 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
16938 ALTIVEC_BUILTIN_VEC_MUL);
16939 def_builtin ("__builtin_vec_sube", opaque_ftype_opaque_opaque_opaque,
16940 ALTIVEC_BUILTIN_VEC_SUBE);
16941 def_builtin ("__builtin_vec_subec", opaque_ftype_opaque_opaque_opaque,
16942 ALTIVEC_BUILTIN_VEC_SUBEC);
16944 /* Cell builtins. */
16945 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
16946 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
16947 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
16948 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
16950 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
16951 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
16952 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
16953 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
16955 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
16956 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
16957 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
16958 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
16960 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
16961 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
16962 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
16963 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
16965 if (TARGET_P9_VECTOR)
16967 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
16968 P9V_BUILTIN_STXVL);
16969 def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long,
16970 P9V_BUILTIN_XST_LEN_R);
16973 /* Add the DST variants. */
16975 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16977 HOST_WIDE_INT mask = d->mask;
16979 /* It is expected that these dst built-in functions may have
16980 d->icode equal to CODE_FOR_nothing. */
16981 if ((mask & builtin_mask) != mask)
16983 if (TARGET_DEBUG_BUILTIN)
16984 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
16988 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
16991 /* Initialize the predicates. */
16992 d = bdesc_altivec_preds;
16993 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16995 machine_mode mode1;
16997 HOST_WIDE_INT mask = d->mask;
16999 if ((mask & builtin_mask) != mask)
17001 if (TARGET_DEBUG_BUILTIN)
17002 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
17007 if (rs6000_overloaded_builtin_p (d->code))
17011 /* Cannot define builtin if the instruction is disabled. */
17012 gcc_assert (d->icode != CODE_FOR_nothing);
17013 mode1 = insn_data[d->icode].operand[1].mode;
17019 type = int_ftype_int_opaque_opaque;
17022 type = int_ftype_int_v2di_v2di;
17025 type = int_ftype_int_v4si_v4si;
17028 type = int_ftype_int_v8hi_v8hi;
17031 type = int_ftype_int_v16qi_v16qi;
17034 type = int_ftype_int_v4sf_v4sf;
17037 type = int_ftype_int_v2df_v2df;
17040 gcc_unreachable ();
17043 def_builtin (d->name, type, d->code);
17046 /* Initialize the abs* operators. */
17048 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
17050 machine_mode mode0;
17052 HOST_WIDE_INT mask = d->mask;
17054 if ((mask & builtin_mask) != mask)
17056 if (TARGET_DEBUG_BUILTIN)
17057 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
17062 /* Cannot define builtin if the instruction is disabled. */
17063 gcc_assert (d->icode != CODE_FOR_nothing);
17064 mode0 = insn_data[d->icode].operand[0].mode;
17069 type = v2di_ftype_v2di;
17072 type = v4si_ftype_v4si;
17075 type = v8hi_ftype_v8hi;
17078 type = v16qi_ftype_v16qi;
17081 type = v4sf_ftype_v4sf;
17084 type = v2df_ftype_v2df;
17087 gcc_unreachable ();
17090 def_builtin (d->name, type, d->code);
17093 /* Initialize target builtin that implements
17094 targetm.vectorize.builtin_mask_for_load. */
17096 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
17097 v16qi_ftype_long_pcvoid,
17098 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
17099 BUILT_IN_MD, NULL, NULL_TREE);
17100 TREE_READONLY (decl) = 1;
17101 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
17102 altivec_builtin_mask_for_load = decl;
17104 /* Access to the vec_init patterns. */
17105 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
17106 integer_type_node, integer_type_node,
17107 integer_type_node, NULL_TREE);
17108 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
17110 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
17111 short_integer_type_node,
17112 short_integer_type_node,
17113 short_integer_type_node,
17114 short_integer_type_node,
17115 short_integer_type_node,
17116 short_integer_type_node,
17117 short_integer_type_node, NULL_TREE);
17118 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
17120 ftype = build_function_type_list (V16QI_type_node, char_type_node,
17121 char_type_node, char_type_node,
17122 char_type_node, char_type_node,
17123 char_type_node, char_type_node,
17124 char_type_node, char_type_node,
17125 char_type_node, char_type_node,
17126 char_type_node, char_type_node,
17127 char_type_node, char_type_node,
17128 char_type_node, NULL_TREE);
17129 def_builtin ("__builtin_vec_init_v16qi", ftype,
17130 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
17132 ftype = build_function_type_list (V4SF_type_node, float_type_node,
17133 float_type_node, float_type_node,
17134 float_type_node, NULL_TREE);
17135 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
17137 /* VSX builtins. */
17138 ftype = build_function_type_list (V2DF_type_node, double_type_node,
17139 double_type_node, NULL_TREE);
17140 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
17142 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
17143 intDI_type_node, NULL_TREE);
17144 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
17146 /* Access to the vec_set patterns. */
17147 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17149 integer_type_node, NULL_TREE);
17150 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
17152 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17154 integer_type_node, NULL_TREE);
17155 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
17157 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17159 integer_type_node, NULL_TREE);
17160 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
17162 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17164 integer_type_node, NULL_TREE);
17165 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
17167 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
17169 integer_type_node, NULL_TREE);
17170 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
17172 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17174 integer_type_node, NULL_TREE);
17175 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17177 /* Access to the vec_extract patterns. */
17178 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17179 integer_type_node, NULL_TREE);
17180 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17182 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17183 integer_type_node, NULL_TREE);
17184 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17186 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17187 integer_type_node, NULL_TREE);
17188 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17190 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17191 integer_type_node, NULL_TREE);
17192 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17194 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17195 integer_type_node, NULL_TREE);
17196 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17198 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17199 integer_type_node, NULL_TREE);
17200 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17203 if (V1TI_type_node)
17205 tree v1ti_ftype_long_pcvoid
17206 = build_function_type_list (V1TI_type_node,
17207 long_integer_type_node, pcvoid_type_node,
17209 tree void_ftype_v1ti_long_pvoid
17210 = build_function_type_list (void_type_node,
17211 V1TI_type_node, long_integer_type_node,
17212 pvoid_type_node, NULL_TREE);
17213 def_builtin ("__builtin_vsx_ld_elemrev_v1ti", v1ti_ftype_long_pcvoid,
17214 VSX_BUILTIN_LD_ELEMREV_V1TI);
17215 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17216 VSX_BUILTIN_LXVD2X_V1TI);
17217 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17218 VSX_BUILTIN_STXVD2X_V1TI);
17219 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17220 NULL_TREE, NULL_TREE);
17221 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17222 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17224 integer_type_node, NULL_TREE);
17225 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17226 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17227 integer_type_node, NULL_TREE);
17228 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17234 htm_init_builtins (void)
17236 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17237 const struct builtin_description *d;
17241 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17243 tree op[MAX_HTM_OPERANDS], type;
17244 HOST_WIDE_INT mask = d->mask;
17245 unsigned attr = rs6000_builtin_info[d->code].attr;
17246 bool void_func = (attr & RS6000_BTC_VOID);
17247 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17249 tree gpr_type_node;
17253 /* It is expected that these htm built-in functions may have
17254 d->icode equal to CODE_FOR_nothing. */
17256 if (TARGET_32BIT && TARGET_POWERPC64)
17257 gpr_type_node = long_long_unsigned_type_node;
17259 gpr_type_node = long_unsigned_type_node;
17261 if (attr & RS6000_BTC_SPR)
17263 rettype = gpr_type_node;
17264 argtype = gpr_type_node;
17266 else if (d->code == HTM_BUILTIN_TABORTDC
17267 || d->code == HTM_BUILTIN_TABORTDCI)
17269 rettype = unsigned_type_node;
17270 argtype = gpr_type_node;
17274 rettype = unsigned_type_node;
17275 argtype = unsigned_type_node;
17278 if ((mask & builtin_mask) != mask)
17280 if (TARGET_DEBUG_BUILTIN)
17281 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17287 if (TARGET_DEBUG_BUILTIN)
17288 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17289 (long unsigned) i);
17293 op[nopnds++] = (void_func) ? void_type_node : rettype;
17295 if (attr_args == RS6000_BTC_UNARY)
17296 op[nopnds++] = argtype;
17297 else if (attr_args == RS6000_BTC_BINARY)
17299 op[nopnds++] = argtype;
17300 op[nopnds++] = argtype;
17302 else if (attr_args == RS6000_BTC_TERNARY)
17304 op[nopnds++] = argtype;
17305 op[nopnds++] = argtype;
17306 op[nopnds++] = argtype;
17312 type = build_function_type_list (op[0], NULL_TREE);
17315 type = build_function_type_list (op[0], op[1], NULL_TREE);
17318 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17321 type = build_function_type_list (op[0], op[1], op[2], op[3],
17325 gcc_unreachable ();
17328 def_builtin (d->name, type, d->code);
17332 /* Hash function for builtin functions with up to 3 arguments and a return
17335 builtin_hasher::hash (builtin_hash_struct *bh)
17340 for (i = 0; i < 4; i++)
17342 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17343 ret = (ret * 2) + bh->uns_p[i];
17349 /* Compare builtin hash entries H1 and H2 for equivalence. */
17351 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17353 return ((p1->mode[0] == p2->mode[0])
17354 && (p1->mode[1] == p2->mode[1])
17355 && (p1->mode[2] == p2->mode[2])
17356 && (p1->mode[3] == p2->mode[3])
17357 && (p1->uns_p[0] == p2->uns_p[0])
17358 && (p1->uns_p[1] == p2->uns_p[1])
17359 && (p1->uns_p[2] == p2->uns_p[2])
17360 && (p1->uns_p[3] == p2->uns_p[3]));
17363 /* Map types for builtin functions with an explicit return type and up to 3
17364 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17365 of the argument. */
17367 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17368 machine_mode mode_arg1, machine_mode mode_arg2,
17369 enum rs6000_builtins builtin, const char *name)
17371 struct builtin_hash_struct h;
17372 struct builtin_hash_struct *h2;
17375 tree ret_type = NULL_TREE;
17376 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17378 /* Create builtin_hash_table. */
17379 if (builtin_hash_table == NULL)
17380 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17382 h.type = NULL_TREE;
17383 h.mode[0] = mode_ret;
17384 h.mode[1] = mode_arg0;
17385 h.mode[2] = mode_arg1;
17386 h.mode[3] = mode_arg2;
17392 /* If the builtin is a type that produces unsigned results or takes unsigned
17393 arguments, and it is returned as a decl for the vectorizer (such as
17394 widening multiplies, permute), make sure the arguments and return value
17395 are type correct. */
17398 /* unsigned 1 argument functions. */
17399 case CRYPTO_BUILTIN_VSBOX:
17400 case CRYPTO_BUILTIN_VSBOX_BE:
17401 case P8V_BUILTIN_VGBBD:
17402 case MISC_BUILTIN_CDTBCD:
17403 case MISC_BUILTIN_CBCDTD:
17408 /* unsigned 2 argument functions. */
17409 case ALTIVEC_BUILTIN_VMULEUB:
17410 case ALTIVEC_BUILTIN_VMULEUH:
17411 case P8V_BUILTIN_VMULEUW:
17412 case ALTIVEC_BUILTIN_VMULOUB:
17413 case ALTIVEC_BUILTIN_VMULOUH:
17414 case P8V_BUILTIN_VMULOUW:
17415 case CRYPTO_BUILTIN_VCIPHER:
17416 case CRYPTO_BUILTIN_VCIPHER_BE:
17417 case CRYPTO_BUILTIN_VCIPHERLAST:
17418 case CRYPTO_BUILTIN_VCIPHERLAST_BE:
17419 case CRYPTO_BUILTIN_VNCIPHER:
17420 case CRYPTO_BUILTIN_VNCIPHER_BE:
17421 case CRYPTO_BUILTIN_VNCIPHERLAST:
17422 case CRYPTO_BUILTIN_VNCIPHERLAST_BE:
17423 case CRYPTO_BUILTIN_VPMSUMB:
17424 case CRYPTO_BUILTIN_VPMSUMH:
17425 case CRYPTO_BUILTIN_VPMSUMW:
17426 case CRYPTO_BUILTIN_VPMSUMD:
17427 case CRYPTO_BUILTIN_VPMSUM:
17428 case MISC_BUILTIN_ADDG6S:
17429 case MISC_BUILTIN_DIVWEU:
17430 case MISC_BUILTIN_DIVDEU:
17431 case VSX_BUILTIN_UDIV_V2DI:
17432 case ALTIVEC_BUILTIN_VMAXUB:
17433 case ALTIVEC_BUILTIN_VMINUB:
17434 case ALTIVEC_BUILTIN_VMAXUH:
17435 case ALTIVEC_BUILTIN_VMINUH:
17436 case ALTIVEC_BUILTIN_VMAXUW:
17437 case ALTIVEC_BUILTIN_VMINUW:
17438 case P8V_BUILTIN_VMAXUD:
17439 case P8V_BUILTIN_VMINUD:
17445 /* unsigned 3 argument functions. */
17446 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
17447 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
17448 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
17449 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
17450 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
17451 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
17452 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
17453 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
17454 case VSX_BUILTIN_VPERM_16QI_UNS:
17455 case VSX_BUILTIN_VPERM_8HI_UNS:
17456 case VSX_BUILTIN_VPERM_4SI_UNS:
17457 case VSX_BUILTIN_VPERM_2DI_UNS:
17458 case VSX_BUILTIN_XXSEL_16QI_UNS:
17459 case VSX_BUILTIN_XXSEL_8HI_UNS:
17460 case VSX_BUILTIN_XXSEL_4SI_UNS:
17461 case VSX_BUILTIN_XXSEL_2DI_UNS:
17462 case CRYPTO_BUILTIN_VPERMXOR:
17463 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
17464 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
17465 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
17466 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
17467 case CRYPTO_BUILTIN_VSHASIGMAW:
17468 case CRYPTO_BUILTIN_VSHASIGMAD:
17469 case CRYPTO_BUILTIN_VSHASIGMA:
17476 /* signed permute functions with unsigned char mask. */
17477 case ALTIVEC_BUILTIN_VPERM_16QI:
17478 case ALTIVEC_BUILTIN_VPERM_8HI:
17479 case ALTIVEC_BUILTIN_VPERM_4SI:
17480 case ALTIVEC_BUILTIN_VPERM_4SF:
17481 case ALTIVEC_BUILTIN_VPERM_2DI:
17482 case ALTIVEC_BUILTIN_VPERM_2DF:
17483 case VSX_BUILTIN_VPERM_16QI:
17484 case VSX_BUILTIN_VPERM_8HI:
17485 case VSX_BUILTIN_VPERM_4SI:
17486 case VSX_BUILTIN_VPERM_4SF:
17487 case VSX_BUILTIN_VPERM_2DI:
17488 case VSX_BUILTIN_VPERM_2DF:
17492 /* unsigned args, signed return. */
17493 case VSX_BUILTIN_XVCVUXDSP:
17494 case VSX_BUILTIN_XVCVUXDDP_UNS:
17495 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
17499 /* signed args, unsigned return. */
17500 case VSX_BUILTIN_XVCVDPUXDS_UNS:
17501 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
17502 case MISC_BUILTIN_UNPACK_TD:
17503 case MISC_BUILTIN_UNPACK_V1TI:
17507 /* unsigned arguments, bool return (compares). */
17508 case ALTIVEC_BUILTIN_VCMPEQUB:
17509 case ALTIVEC_BUILTIN_VCMPEQUH:
17510 case ALTIVEC_BUILTIN_VCMPEQUW:
17511 case P8V_BUILTIN_VCMPEQUD:
17512 case VSX_BUILTIN_CMPGE_U16QI:
17513 case VSX_BUILTIN_CMPGE_U8HI:
17514 case VSX_BUILTIN_CMPGE_U4SI:
17515 case VSX_BUILTIN_CMPGE_U2DI:
17516 case ALTIVEC_BUILTIN_VCMPGTUB:
17517 case ALTIVEC_BUILTIN_VCMPGTUH:
17518 case ALTIVEC_BUILTIN_VCMPGTUW:
17519 case P8V_BUILTIN_VCMPGTUD:
17524 /* unsigned arguments for 128-bit pack instructions. */
17525 case MISC_BUILTIN_PACK_TD:
17526 case MISC_BUILTIN_PACK_V1TI:
17531 /* unsigned second arguments (vector shift right). */
17532 case ALTIVEC_BUILTIN_VSRB:
17533 case ALTIVEC_BUILTIN_VSRH:
17534 case ALTIVEC_BUILTIN_VSRW:
17535 case P8V_BUILTIN_VSRD:
17543 /* Figure out how many args are present. */
17544 while (num_args > 0 && h.mode[num_args] == VOIDmode)
17547 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
17548 if (!ret_type && h.uns_p[0])
17549 ret_type = builtin_mode_to_type[h.mode[0]][0];
17552 fatal_error (input_location,
17553 "internal error: builtin function %qs had an unexpected "
17554 "return type %qs", name, GET_MODE_NAME (h.mode[0]));
17556 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
17557 arg_type[i] = NULL_TREE;
17559 for (i = 0; i < num_args; i++)
17561 int m = (int) h.mode[i+1];
17562 int uns_p = h.uns_p[i+1];
17564 arg_type[i] = builtin_mode_to_type[m][uns_p];
17565 if (!arg_type[i] && uns_p)
17566 arg_type[i] = builtin_mode_to_type[m][0];
17569 fatal_error (input_location,
17570 "internal error: builtin function %qs, argument %d "
17571 "had unexpected argument type %qs", name, i,
17572 GET_MODE_NAME (m));
17575 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
17576 if (*found == NULL)
17578 h2 = ggc_alloc<builtin_hash_struct> ();
17582 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
17583 arg_type[2], NULL_TREE);
17586 return (*found)->type;
17590 rs6000_common_init_builtins (void)
17592 const struct builtin_description *d;
17595 tree opaque_ftype_opaque = NULL_TREE;
17596 tree opaque_ftype_opaque_opaque = NULL_TREE;
17597 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
17598 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17600 /* Create Altivec and VSX builtins on machines with at least the
17601 general purpose extensions (970 and newer) to allow the use of
17602 the target attribute. */
17604 if (TARGET_EXTRA_BUILTINS)
17605 builtin_mask |= RS6000_BTM_COMMON;
17607 /* Add the ternary operators. */
17609 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17612 HOST_WIDE_INT mask = d->mask;
17614 if ((mask & builtin_mask) != mask)
17616 if (TARGET_DEBUG_BUILTIN)
17617 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
17621 if (rs6000_overloaded_builtin_p (d->code))
17623 if (! (type = opaque_ftype_opaque_opaque_opaque))
17624 type = opaque_ftype_opaque_opaque_opaque
17625 = build_function_type_list (opaque_V4SI_type_node,
17626 opaque_V4SI_type_node,
17627 opaque_V4SI_type_node,
17628 opaque_V4SI_type_node,
17633 enum insn_code icode = d->icode;
17636 if (TARGET_DEBUG_BUILTIN)
17637 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
17643 if (icode == CODE_FOR_nothing)
17645 if (TARGET_DEBUG_BUILTIN)
17646 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
17652 type = builtin_function_type (insn_data[icode].operand[0].mode,
17653 insn_data[icode].operand[1].mode,
17654 insn_data[icode].operand[2].mode,
17655 insn_data[icode].operand[3].mode,
17659 def_builtin (d->name, type, d->code);
17662 /* Add the binary operators. */
17664 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17666 machine_mode mode0, mode1, mode2;
17668 HOST_WIDE_INT mask = d->mask;
17670 if ((mask & builtin_mask) != mask)
17672 if (TARGET_DEBUG_BUILTIN)
17673 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
17677 if (rs6000_overloaded_builtin_p (d->code))
17679 if (! (type = opaque_ftype_opaque_opaque))
17680 type = opaque_ftype_opaque_opaque
17681 = build_function_type_list (opaque_V4SI_type_node,
17682 opaque_V4SI_type_node,
17683 opaque_V4SI_type_node,
17688 enum insn_code icode = d->icode;
17691 if (TARGET_DEBUG_BUILTIN)
17692 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
17698 if (icode == CODE_FOR_nothing)
17700 if (TARGET_DEBUG_BUILTIN)
17701 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
17707 mode0 = insn_data[icode].operand[0].mode;
17708 mode1 = insn_data[icode].operand[1].mode;
17709 mode2 = insn_data[icode].operand[2].mode;
17711 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
17715 def_builtin (d->name, type, d->code);
17718 /* Add the simple unary operators. */
17720 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17722 machine_mode mode0, mode1;
17724 HOST_WIDE_INT mask = d->mask;
17726 if ((mask & builtin_mask) != mask)
17728 if (TARGET_DEBUG_BUILTIN)
17729 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
17733 if (rs6000_overloaded_builtin_p (d->code))
17735 if (! (type = opaque_ftype_opaque))
17736 type = opaque_ftype_opaque
17737 = build_function_type_list (opaque_V4SI_type_node,
17738 opaque_V4SI_type_node,
17743 enum insn_code icode = d->icode;
17746 if (TARGET_DEBUG_BUILTIN)
17747 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
17753 if (icode == CODE_FOR_nothing)
17755 if (TARGET_DEBUG_BUILTIN)
17756 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
17762 mode0 = insn_data[icode].operand[0].mode;
17763 mode1 = insn_data[icode].operand[1].mode;
17765 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
17769 def_builtin (d->name, type, d->code);
17772 /* Add the simple no-argument operators. */
17774 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17776 machine_mode mode0;
17778 HOST_WIDE_INT mask = d->mask;
17780 if ((mask & builtin_mask) != mask)
17782 if (TARGET_DEBUG_BUILTIN)
17783 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
17786 if (rs6000_overloaded_builtin_p (d->code))
17788 if (!opaque_ftype_opaque)
17789 opaque_ftype_opaque
17790 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
17791 type = opaque_ftype_opaque;
17795 enum insn_code icode = d->icode;
17798 if (TARGET_DEBUG_BUILTIN)
17799 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
17800 (long unsigned) i);
17803 if (icode == CODE_FOR_nothing)
17805 if (TARGET_DEBUG_BUILTIN)
17807 "rs6000_builtin, skip no-argument %s (no code)\n",
17811 mode0 = insn_data[icode].operand[0].mode;
17812 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
17815 def_builtin (d->name, type, d->code);
17819 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
17821 init_float128_ibm (machine_mode mode)
17823 if (!TARGET_XL_COMPAT)
17825 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
17826 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
17827 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
17828 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
17830 if (!TARGET_HARD_FLOAT)
17832 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
17833 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
17834 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
17835 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
17836 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
17837 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
17838 set_optab_libfunc (le_optab, mode, "__gcc_qle");
17839 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
17841 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
17842 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
17843 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
17844 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
17845 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
17846 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
17847 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
17848 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
17853 set_optab_libfunc (add_optab, mode, "_xlqadd");
17854 set_optab_libfunc (sub_optab, mode, "_xlqsub");
17855 set_optab_libfunc (smul_optab, mode, "_xlqmul");
17856 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
17859 /* Add various conversions for IFmode to use the traditional TFmode
17861 if (mode == IFmode)
17863 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
17864 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
17865 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
17866 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
17867 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
17868 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
17870 if (TARGET_POWERPC64)
17872 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
17873 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
17874 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
17875 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
17880 /* Create a decl for either complex long double multiply or complex long double
17881 divide when long double is IEEE 128-bit floating point. We can't use
17882 __multc3 and __divtc3 because the original long double using IBM extended
17883 double used those names. The complex multiply/divide functions are encoded
17884 as builtin functions with a complex result and 4 scalar inputs. */
17887 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
17889 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
17892 set_builtin_decl (fncode, fndecl, true);
17894 if (TARGET_DEBUG_BUILTIN)
17895 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
17900 /* Set up IEEE 128-bit floating point routines. Use different names if the
17901 arguments can be passed in a vector register. The historical PowerPC
17902 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
17903 continue to use that if we aren't using vector registers to pass IEEE
17904 128-bit floating point. */
17907 init_float128_ieee (machine_mode mode)
17909 if (FLOAT128_VECTOR_P (mode))
17911 static bool complex_muldiv_init_p = false;
17913 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
17914 we have clone or target attributes, this will be called a second
17915 time. We want to create the built-in function only once. */
17916 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
17918 complex_muldiv_init_p = true;
17919 built_in_function fncode_mul =
17920 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
17921 - MIN_MODE_COMPLEX_FLOAT);
17922 built_in_function fncode_div =
17923 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
17924 - MIN_MODE_COMPLEX_FLOAT);
17926 tree fntype = build_function_type_list (complex_long_double_type_node,
17927 long_double_type_node,
17928 long_double_type_node,
17929 long_double_type_node,
17930 long_double_type_node,
17933 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
17934 create_complex_muldiv ("__divkc3", fncode_div, fntype);
17937 set_optab_libfunc (add_optab, mode, "__addkf3");
17938 set_optab_libfunc (sub_optab, mode, "__subkf3");
17939 set_optab_libfunc (neg_optab, mode, "__negkf2");
17940 set_optab_libfunc (smul_optab, mode, "__mulkf3");
17941 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
17942 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
17943 set_optab_libfunc (abs_optab, mode, "__abskf2");
17944 set_optab_libfunc (powi_optab, mode, "__powikf2");
17946 set_optab_libfunc (eq_optab, mode, "__eqkf2");
17947 set_optab_libfunc (ne_optab, mode, "__nekf2");
17948 set_optab_libfunc (gt_optab, mode, "__gtkf2");
17949 set_optab_libfunc (ge_optab, mode, "__gekf2");
17950 set_optab_libfunc (lt_optab, mode, "__ltkf2");
17951 set_optab_libfunc (le_optab, mode, "__lekf2");
17952 set_optab_libfunc (unord_optab, mode, "__unordkf2");
17954 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
17955 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
17956 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
17957 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
17959 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
17960 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17961 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
17963 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
17964 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17965 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
17967 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
17968 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
17969 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
17970 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
17971 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
17972 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
17974 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
17975 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
17976 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
17977 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
17979 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
17980 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
17981 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
17982 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
17984 if (TARGET_POWERPC64)
17986 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
17987 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
17988 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
17989 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
17995 set_optab_libfunc (add_optab, mode, "_q_add");
17996 set_optab_libfunc (sub_optab, mode, "_q_sub");
17997 set_optab_libfunc (neg_optab, mode, "_q_neg");
17998 set_optab_libfunc (smul_optab, mode, "_q_mul");
17999 set_optab_libfunc (sdiv_optab, mode, "_q_div");
18000 if (TARGET_PPC_GPOPT)
18001 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
18003 set_optab_libfunc (eq_optab, mode, "_q_feq");
18004 set_optab_libfunc (ne_optab, mode, "_q_fne");
18005 set_optab_libfunc (gt_optab, mode, "_q_fgt");
18006 set_optab_libfunc (ge_optab, mode, "_q_fge");
18007 set_optab_libfunc (lt_optab, mode, "_q_flt");
18008 set_optab_libfunc (le_optab, mode, "_q_fle");
18010 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
18011 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
18012 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
18013 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
18014 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
18015 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
18016 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
18017 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
18022 rs6000_init_libfuncs (void)
18024 /* __float128 support. */
18025 if (TARGET_FLOAT128_TYPE)
18027 init_float128_ibm (IFmode);
18028 init_float128_ieee (KFmode);
18031 /* AIX/Darwin/64-bit Linux quad floating point routines. */
18032 if (TARGET_LONG_DOUBLE_128)
18034 if (!TARGET_IEEEQUAD)
18035 init_float128_ibm (TFmode);
18037 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
18039 init_float128_ieee (TFmode);
18043 /* Emit a potentially record-form instruction, setting DST from SRC.
18044 If DOT is 0, that is all; otherwise, set CCREG to the result of the
18045 signed comparison of DST with zero. If DOT is 1, the generated RTL
18046 doesn't care about the DST result; if DOT is 2, it does. If CCREG
18047 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
18048 a separate COMPARE. */
18051 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
18055 emit_move_insn (dst, src);
18059 if (cc_reg_not_cr0_operand (ccreg, CCmode))
18061 emit_move_insn (dst, src);
18062 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
18066 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
18069 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
18070 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
18074 rtx set = gen_rtx_SET (dst, src);
18075 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
18080 /* A validation routine: say whether CODE, a condition code, and MODE
18081 match. The other alternatives either don't make sense or should
18082 never be generated. */
18085 validate_condition_mode (enum rtx_code code, machine_mode mode)
18087 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
18088 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
18089 && GET_MODE_CLASS (mode) == MODE_CC);
18091 /* These don't make sense. */
18092 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
18093 || mode != CCUNSmode);
18095 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
18096 || mode == CCUNSmode);
18098 gcc_assert (mode == CCFPmode
18099 || (code != ORDERED && code != UNORDERED
18100 && code != UNEQ && code != LTGT
18101 && code != UNGT && code != UNLT
18102 && code != UNGE && code != UNLE));
18104 /* These should never be generated except for
18105 flag_finite_math_only. */
18106 gcc_assert (mode != CCFPmode
18107 || flag_finite_math_only
18108 || (code != LE && code != GE
18109 && code != UNEQ && code != LTGT
18110 && code != UNGT && code != UNLT));
18112 /* These are invalid; the information is not there. */
18113 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
18117 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
18118 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
18119 not zero, store there the bit offset (counted from the right) where
18120 the single stretch of 1 bits begins; and similarly for B, the bit
18121 offset where it ends. */
18124 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
18126 unsigned HOST_WIDE_INT val = INTVAL (mask);
18127 unsigned HOST_WIDE_INT bit;
18129 int n = GET_MODE_PRECISION (mode);
18131 if (mode != DImode && mode != SImode)
18134 if (INTVAL (mask) >= 0)
18137 ne = exact_log2 (bit);
18138 nb = exact_log2 (val + bit);
18140 else if (val + 1 == 0)
18149 nb = exact_log2 (bit);
18150 ne = exact_log2 (val + bit);
18155 ne = exact_log2 (bit);
18156 if (val + bit == 0)
18164 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
18175 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
18176 or rldicr instruction, to implement an AND with it in mode MODE. */
18179 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
18183 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18186 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
18188 if (mode == DImode)
18189 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
18191 /* For SImode, rlwinm can do everything. */
18192 if (mode == SImode)
18193 return (nb < 32 && ne < 32);
18198 /* Return the instruction template for an AND with mask in mode MODE, with
18199 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18202 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
18206 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
18207 gcc_unreachable ();
18209 if (mode == DImode && ne == 0)
18211 operands[3] = GEN_INT (63 - nb);
18213 return "rldicl. %0,%1,0,%3";
18214 return "rldicl %0,%1,0,%3";
18217 if (mode == DImode && nb == 63)
18219 operands[3] = GEN_INT (63 - ne);
18221 return "rldicr. %0,%1,0,%3";
18222 return "rldicr %0,%1,0,%3";
18225 if (nb < 32 && ne < 32)
18227 operands[3] = GEN_INT (31 - nb);
18228 operands[4] = GEN_INT (31 - ne);
18230 return "rlwinm. %0,%1,0,%3,%4";
18231 return "rlwinm %0,%1,0,%3,%4";
18234 gcc_unreachable ();
18237 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
18238 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
18239 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
18242 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
18246 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18249 int n = GET_MODE_PRECISION (mode);
18252 if (CONST_INT_P (XEXP (shift, 1)))
18254 sh = INTVAL (XEXP (shift, 1));
18255 if (sh < 0 || sh >= n)
18259 rtx_code code = GET_CODE (shift);
18261 /* Convert any shift by 0 to a rotate, to simplify below code. */
18265 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18266 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18268 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18274 /* DImode rotates need rld*. */
18275 if (mode == DImode && code == ROTATE)
18276 return (nb == 63 || ne == 0 || ne == sh);
18278 /* SImode rotates need rlw*. */
18279 if (mode == SImode && code == ROTATE)
18280 return (nb < 32 && ne < 32 && sh < 32);
18282 /* Wrap-around masks are only okay for rotates. */
18286 /* Variable shifts are only okay for rotates. */
18290 /* Don't allow ASHIFT if the mask is wrong for that. */
18291 if (code == ASHIFT && ne < sh)
18294 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
18295 if the mask is wrong for that. */
18296 if (nb < 32 && ne < 32 && sh < 32
18297 && !(code == LSHIFTRT && nb >= 32 - sh))
18300 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
18301 if the mask is wrong for that. */
18302 if (code == LSHIFTRT)
18304 if (nb == 63 || ne == 0 || ne == sh)
18305 return !(code == LSHIFTRT && nb >= sh);
18310 /* Return the instruction template for a shift with mask in mode MODE, with
18311 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18314 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
18318 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18319 gcc_unreachable ();
18321 if (mode == DImode && ne == 0)
18323 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18324 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
18325 operands[3] = GEN_INT (63 - nb);
18327 return "rld%I2cl. %0,%1,%2,%3";
18328 return "rld%I2cl %0,%1,%2,%3";
18331 if (mode == DImode && nb == 63)
18333 operands[3] = GEN_INT (63 - ne);
18335 return "rld%I2cr. %0,%1,%2,%3";
18336 return "rld%I2cr %0,%1,%2,%3";
18340 && GET_CODE (operands[4]) != LSHIFTRT
18341 && CONST_INT_P (operands[2])
18342 && ne == INTVAL (operands[2]))
18344 operands[3] = GEN_INT (63 - nb);
18346 return "rld%I2c. %0,%1,%2,%3";
18347 return "rld%I2c %0,%1,%2,%3";
18350 if (nb < 32 && ne < 32)
18352 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18353 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18354 operands[3] = GEN_INT (31 - nb);
18355 operands[4] = GEN_INT (31 - ne);
18356 /* This insn can also be a 64-bit rotate with mask that really makes
18357 it just a shift right (with mask); the %h below are to adjust for
18358 that situation (shift count is >= 32 in that case). */
18360 return "rlw%I2nm. %0,%1,%h2,%3,%4";
18361 return "rlw%I2nm %0,%1,%h2,%3,%4";
18364 gcc_unreachable ();
18367 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
18368 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
18369 ASHIFT, or LSHIFTRT) in mode MODE. */
18372 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
18376 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18379 int n = GET_MODE_PRECISION (mode);
18381 int sh = INTVAL (XEXP (shift, 1));
18382 if (sh < 0 || sh >= n)
18385 rtx_code code = GET_CODE (shift);
18387 /* Convert any shift by 0 to a rotate, to simplify below code. */
18391 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18392 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18394 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18400 /* DImode rotates need rldimi. */
18401 if (mode == DImode && code == ROTATE)
18404 /* SImode rotates need rlwimi. */
18405 if (mode == SImode && code == ROTATE)
18406 return (nb < 32 && ne < 32 && sh < 32);
18408 /* Wrap-around masks are only okay for rotates. */
18412 /* Don't allow ASHIFT if the mask is wrong for that. */
18413 if (code == ASHIFT && ne < sh)
18416 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
18417 if the mask is wrong for that. */
18418 if (nb < 32 && ne < 32 && sh < 32
18419 && !(code == LSHIFTRT && nb >= 32 - sh))
18422 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
18423 if the mask is wrong for that. */
18424 if (code == LSHIFTRT)
18427 return !(code == LSHIFTRT && nb >= sh);
18432 /* Return the instruction template for an insert with mask in mode MODE, with
18433 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18436 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
18440 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18441 gcc_unreachable ();
18443 /* Prefer rldimi because rlwimi is cracked. */
18444 if (TARGET_POWERPC64
18445 && (!dot || mode == DImode)
18446 && GET_CODE (operands[4]) != LSHIFTRT
18447 && ne == INTVAL (operands[2]))
18449 operands[3] = GEN_INT (63 - nb);
18451 return "rldimi. %0,%1,%2,%3";
18452 return "rldimi %0,%1,%2,%3";
18455 if (nb < 32 && ne < 32)
18457 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18458 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18459 operands[3] = GEN_INT (31 - nb);
18460 operands[4] = GEN_INT (31 - ne);
18462 return "rlwimi. %0,%1,%2,%3,%4";
18463 return "rlwimi %0,%1,%2,%3,%4";
18466 gcc_unreachable ();
18469 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
18470 using two machine instructions. */
18473 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
18475 /* There are two kinds of AND we can handle with two insns:
18476 1) those we can do with two rl* insn;
18479 We do not handle that last case yet. */
18481 /* If there is just one stretch of ones, we can do it. */
18482 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
18485 /* Otherwise, fill in the lowest "hole"; if we can do the result with
18486 one insn, we can do the whole thing with two. */
18487 unsigned HOST_WIDE_INT val = INTVAL (c);
18488 unsigned HOST_WIDE_INT bit1 = val & -val;
18489 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18490 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18491 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18492 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
18495 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
18496 If EXPAND is true, split rotate-and-mask instructions we generate to
18497 their constituent parts as well (this is used during expand); if DOT
18498 is 1, make the last insn a record-form instruction clobbering the
18499 destination GPR and setting the CC reg (from operands[3]); if 2, set
18500 that GPR as well as the CC reg. */
18503 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
18505 gcc_assert (!(expand && dot));
18507 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
18509 /* If it is one stretch of ones, it is DImode; shift left, mask, then
18510 shift right. This generates better code than doing the masks without
18511 shifts, or shifting first right and then left. */
18513 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
18515 gcc_assert (mode == DImode);
18517 int shift = 63 - nb;
18520 rtx tmp1 = gen_reg_rtx (DImode);
18521 rtx tmp2 = gen_reg_rtx (DImode);
18522 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
18523 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
18524 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
18528 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
18529 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
18530 emit_move_insn (operands[0], tmp);
18531 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
18532 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18537 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
18538 that does the rest. */
18539 unsigned HOST_WIDE_INT bit1 = val & -val;
18540 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18541 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18542 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18544 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
18545 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
18547 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
18549 /* Two "no-rotate"-and-mask instructions, for SImode. */
18550 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
18552 gcc_assert (mode == SImode);
18554 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18555 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
18556 emit_move_insn (reg, tmp);
18557 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18558 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18562 gcc_assert (mode == DImode);
18564 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
18565 insns; we have to do the first in SImode, because it wraps. */
18566 if (mask2 <= 0xffffffff
18567 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
18569 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18570 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
18572 rtx reg_low = gen_lowpart (SImode, reg);
18573 emit_move_insn (reg_low, tmp);
18574 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18575 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18579 /* Two rld* insns: rotate, clear the hole in the middle (which now is
18580 at the top end), rotate back and clear the other hole. */
18581 int right = exact_log2 (bit3);
18582 int left = 64 - right;
18584 /* Rotate the mask too. */
18585 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
18589 rtx tmp1 = gen_reg_rtx (DImode);
18590 rtx tmp2 = gen_reg_rtx (DImode);
18591 rtx tmp3 = gen_reg_rtx (DImode);
18592 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
18593 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
18594 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
18595 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
18599 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
18600 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
18601 emit_move_insn (operands[0], tmp);
18602 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
18603 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
18604 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18608 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
18609 for lfq and stfq insns iff the registers are hard registers. */
18612 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
18614 /* We might have been passed a SUBREG. */
18615 if (!REG_P (reg1) || !REG_P (reg2))
18618 /* We might have been passed non floating point registers. */
18619 if (!FP_REGNO_P (REGNO (reg1))
18620 || !FP_REGNO_P (REGNO (reg2)))
18623 return (REGNO (reg1) == REGNO (reg2) - 1);
18626 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
18627 addr1 and addr2 must be in consecutive memory locations
18628 (addr2 == addr1 + 8). */
18631 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
18634 unsigned int reg1, reg2;
18635 int offset1, offset2;
18637 /* The mems cannot be volatile. */
18638 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
18641 addr1 = XEXP (mem1, 0);
18642 addr2 = XEXP (mem2, 0);
18644 /* Extract an offset (if used) from the first addr. */
18645 if (GET_CODE (addr1) == PLUS)
18647 /* If not a REG, return zero. */
18648 if (!REG_P (XEXP (addr1, 0)))
18652 reg1 = REGNO (XEXP (addr1, 0));
18653 /* The offset must be constant! */
18654 if (!CONST_INT_P (XEXP (addr1, 1)))
18656 offset1 = INTVAL (XEXP (addr1, 1));
18659 else if (!REG_P (addr1))
18663 reg1 = REGNO (addr1);
18664 /* This was a simple (mem (reg)) expression. Offset is 0. */
18668 /* And now for the second addr. */
18669 if (GET_CODE (addr2) == PLUS)
18671 /* If not a REG, return zero. */
18672 if (!REG_P (XEXP (addr2, 0)))
18676 reg2 = REGNO (XEXP (addr2, 0));
18677 /* The offset must be constant. */
18678 if (!CONST_INT_P (XEXP (addr2, 1)))
18680 offset2 = INTVAL (XEXP (addr2, 1));
18683 else if (!REG_P (addr2))
18687 reg2 = REGNO (addr2);
18688 /* This was a simple (mem (reg)) expression. Offset is 0. */
18692 /* Both of these must have the same base register. */
18696 /* The offset for the second addr must be 8 more than the first addr. */
18697 if (offset2 != offset1 + 8)
18700 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
18705 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
18706 need to use DDmode, in all other cases we can use the same mode. */
18707 static machine_mode
18708 rs6000_secondary_memory_needed_mode (machine_mode mode)
18710 if (lra_in_progress && mode == SDmode)
18715 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
18716 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
18717 only work on the traditional altivec registers, note if an altivec register
18720 static enum rs6000_reg_type
18721 register_to_reg_type (rtx reg, bool *is_altivec)
18723 HOST_WIDE_INT regno;
18724 enum reg_class rclass;
18726 if (SUBREG_P (reg))
18727 reg = SUBREG_REG (reg);
18730 return NO_REG_TYPE;
18732 regno = REGNO (reg);
18733 if (!HARD_REGISTER_NUM_P (regno))
18735 if (!lra_in_progress && !reload_completed)
18736 return PSEUDO_REG_TYPE;
18738 regno = true_regnum (reg);
18739 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
18740 return PSEUDO_REG_TYPE;
18743 gcc_assert (regno >= 0);
18745 if (is_altivec && ALTIVEC_REGNO_P (regno))
18746 *is_altivec = true;
18748 rclass = rs6000_regno_regclass[regno];
18749 return reg_class_to_reg_type[(int)rclass];
18752 /* Helper function to return the cost of adding a TOC entry address. */
18755 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
18759 if (TARGET_CMODEL != CMODEL_SMALL)
18760 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
18763 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
18768 /* Helper function for rs6000_secondary_reload to determine whether the memory
18769 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
18770 needs reloading. Return negative if the memory is not handled by the memory
18771 helper functions and to try a different reload method, 0 if no additional
18772 instructions are need, and positive to give the extra cost for the
18776 rs6000_secondary_reload_memory (rtx addr,
18777 enum reg_class rclass,
18780 int extra_cost = 0;
18781 rtx reg, and_arg, plus_arg0, plus_arg1;
18782 addr_mask_type addr_mask;
18783 const char *type = NULL;
18784 const char *fail_msg = NULL;
18786 if (GPR_REG_CLASS_P (rclass))
18787 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
18789 else if (rclass == FLOAT_REGS)
18790 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
18792 else if (rclass == ALTIVEC_REGS)
18793 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
18795 /* For the combined VSX_REGS, turn off Altivec AND -16. */
18796 else if (rclass == VSX_REGS)
18797 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
18798 & ~RELOAD_REG_AND_M16);
18800 /* If the register allocator hasn't made up its mind yet on the register
18801 class to use, settle on defaults to use. */
18802 else if (rclass == NO_REGS)
18804 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
18805 & ~RELOAD_REG_AND_M16);
18807 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
18808 addr_mask &= ~(RELOAD_REG_INDEXED
18809 | RELOAD_REG_PRE_INCDEC
18810 | RELOAD_REG_PRE_MODIFY);
18816 /* If the register isn't valid in this register class, just return now. */
18817 if ((addr_mask & RELOAD_REG_VALID) == 0)
18819 if (TARGET_DEBUG_ADDR)
18822 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18823 "not valid in class\n",
18824 GET_MODE_NAME (mode), reg_class_names[rclass]);
18831 switch (GET_CODE (addr))
18833 /* Does the register class supports auto update forms for this mode? We
18834 don't need a scratch register, since the powerpc only supports
18835 PRE_INC, PRE_DEC, and PRE_MODIFY. */
18838 reg = XEXP (addr, 0);
18839 if (!base_reg_operand (addr, GET_MODE (reg)))
18841 fail_msg = "no base register #1";
18845 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18853 reg = XEXP (addr, 0);
18854 plus_arg1 = XEXP (addr, 1);
18855 if (!base_reg_operand (reg, GET_MODE (reg))
18856 || GET_CODE (plus_arg1) != PLUS
18857 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
18859 fail_msg = "bad PRE_MODIFY";
18863 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18870 /* Do we need to simulate AND -16 to clear the bottom address bits used
18871 in VMX load/stores? Only allow the AND for vector sizes. */
18873 and_arg = XEXP (addr, 0);
18874 if (GET_MODE_SIZE (mode) != 16
18875 || !CONST_INT_P (XEXP (addr, 1))
18876 || INTVAL (XEXP (addr, 1)) != -16)
18878 fail_msg = "bad Altivec AND #1";
18882 if (rclass != ALTIVEC_REGS)
18884 if (legitimate_indirect_address_p (and_arg, false))
18887 else if (legitimate_indexed_address_p (and_arg, false))
18892 fail_msg = "bad Altivec AND #2";
18900 /* If this is an indirect address, make sure it is a base register. */
18903 if (!legitimate_indirect_address_p (addr, false))
18910 /* If this is an indexed address, make sure the register class can handle
18911 indexed addresses for this mode. */
18913 plus_arg0 = XEXP (addr, 0);
18914 plus_arg1 = XEXP (addr, 1);
18916 /* (plus (plus (reg) (constant)) (constant)) is generated during
18917 push_reload processing, so handle it now. */
18918 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
18920 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18927 /* (plus (plus (reg) (constant)) (reg)) is also generated during
18928 push_reload processing, so handle it now. */
18929 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
18931 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18934 type = "indexed #2";
18938 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
18940 fail_msg = "no base register #2";
18944 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
18946 if ((addr_mask & RELOAD_REG_INDEXED) == 0
18947 || !legitimate_indexed_address_p (addr, false))
18954 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
18955 && CONST_INT_P (plus_arg1))
18957 if (!quad_address_offset_p (INTVAL (plus_arg1)))
18960 type = "vector d-form offset";
18964 /* Make sure the register class can handle offset addresses. */
18965 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
18967 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18970 type = "offset #2";
18976 fail_msg = "bad PLUS";
18983 /* Quad offsets are restricted and can't handle normal addresses. */
18984 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18987 type = "vector d-form lo_sum";
18990 else if (!legitimate_lo_sum_address_p (mode, addr, false))
18992 fail_msg = "bad LO_SUM";
18996 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19003 /* Static addresses need to create a TOC entry. */
19007 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19010 type = "vector d-form lo_sum #2";
19016 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
19020 /* TOC references look like offsetable memory. */
19022 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
19024 fail_msg = "bad UNSPEC";
19028 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19031 type = "vector d-form lo_sum #3";
19034 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19037 type = "toc reference";
19043 fail_msg = "bad address";
19048 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
19050 if (extra_cost < 0)
19052 "rs6000_secondary_reload_memory error: mode = %s, "
19053 "class = %s, addr_mask = '%s', %s\n",
19054 GET_MODE_NAME (mode),
19055 reg_class_names[rclass],
19056 rs6000_debug_addr_mask (addr_mask, false),
19057 (fail_msg != NULL) ? fail_msg : "<bad address>");
19061 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19062 "addr_mask = '%s', extra cost = %d, %s\n",
19063 GET_MODE_NAME (mode),
19064 reg_class_names[rclass],
19065 rs6000_debug_addr_mask (addr_mask, false),
19067 (type) ? type : "<none>");
19075 /* Helper function for rs6000_secondary_reload to return true if a move to a
19076 different register classe is really a simple move. */
19079 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
19080 enum rs6000_reg_type from_type,
19083 int size = GET_MODE_SIZE (mode);
19085 /* Add support for various direct moves available. In this function, we only
19086 look at cases where we don't need any extra registers, and one or more
19087 simple move insns are issued. Originally small integers are not allowed
19088 in FPR/VSX registers. Single precision binary floating is not a simple
19089 move because we need to convert to the single precision memory layout.
19090 The 4-byte SDmode can be moved. TDmode values are disallowed since they
19091 need special direct move handling, which we do not support yet. */
19092 if (TARGET_DIRECT_MOVE
19093 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19094 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
19096 if (TARGET_POWERPC64)
19098 /* ISA 2.07: MTVSRD or MVFVSRD. */
19102 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
19103 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
19107 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19108 if (TARGET_P8_VECTOR)
19110 if (mode == SImode)
19113 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
19117 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19118 if (mode == SDmode)
19122 /* Power6+: MFTGPR or MFFGPR. */
19123 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
19124 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
19125 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19128 /* Move to/from SPR. */
19129 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
19130 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
19131 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19137 /* Direct move helper function for rs6000_secondary_reload, handle all of the
19138 special direct moves that involve allocating an extra register, return the
19139 insn code of the helper function if there is such a function or
19140 CODE_FOR_nothing if not. */
19143 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
19144 enum rs6000_reg_type from_type,
19146 secondary_reload_info *sri,
19150 enum insn_code icode = CODE_FOR_nothing;
19152 int size = GET_MODE_SIZE (mode);
19154 if (TARGET_POWERPC64 && size == 16)
19156 /* Handle moving 128-bit values from GPRs to VSX point registers on
19157 ISA 2.07 (power8, power9) when running in 64-bit mode using
19158 XXPERMDI to glue the two 64-bit values back together. */
19159 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19161 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
19162 icode = reg_addr[mode].reload_vsx_gpr;
19165 /* Handle moving 128-bit values from VSX point registers to GPRs on
19166 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
19167 bottom 64-bit value. */
19168 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19170 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
19171 icode = reg_addr[mode].reload_gpr_vsx;
19175 else if (TARGET_POWERPC64 && mode == SFmode)
19177 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19179 cost = 3; /* xscvdpspn, mfvsrd, and. */
19180 icode = reg_addr[mode].reload_gpr_vsx;
19183 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19185 cost = 2; /* mtvsrz, xscvspdpn. */
19186 icode = reg_addr[mode].reload_vsx_gpr;
19190 else if (!TARGET_POWERPC64 && size == 8)
19192 /* Handle moving 64-bit values from GPRs to floating point registers on
19193 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
19194 32-bit values back together. Altivec register classes must be handled
19195 specially since a different instruction is used, and the secondary
19196 reload support requires a single instruction class in the scratch
19197 register constraint. However, right now TFmode is not allowed in
19198 Altivec registers, so the pattern will never match. */
19199 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
19201 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
19202 icode = reg_addr[mode].reload_fpr_gpr;
19206 if (icode != CODE_FOR_nothing)
19211 sri->icode = icode;
19212 sri->extra_cost = cost;
19219 /* Return whether a move between two register classes can be done either
19220 directly (simple move) or via a pattern that uses a single extra temporary
19221 (using ISA 2.07's direct move in this case. */
19224 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
19225 enum rs6000_reg_type from_type,
19227 secondary_reload_info *sri,
19230 /* Fall back to load/store reloads if either type is not a register. */
19231 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
19234 /* If we haven't allocated registers yet, assume the move can be done for the
19235 standard register types. */
19236 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
19237 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
19238 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
19241 /* Moves to the same set of registers is a simple move for non-specialized
19243 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
19246 /* Check whether a simple move can be done directly. */
19247 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
19251 sri->icode = CODE_FOR_nothing;
19252 sri->extra_cost = 0;
19257 /* Now check if we can do it in a few steps. */
19258 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
19262 /* Inform reload about cases where moving X with a mode MODE to a register in
19263 RCLASS requires an extra scratch or immediate register. Return the class
19264 needed for the immediate register.
19266 For VSX and Altivec, we may need a register to convert sp+offset into
19269 For misaligned 64-bit gpr loads and stores we need a register to
19270 convert an offset address to indirect. */
19273 rs6000_secondary_reload (bool in_p,
19275 reg_class_t rclass_i,
19277 secondary_reload_info *sri)
19279 enum reg_class rclass = (enum reg_class) rclass_i;
19280 reg_class_t ret = ALL_REGS;
19281 enum insn_code icode;
19282 bool default_p = false;
19283 bool done_p = false;
19285 /* Allow subreg of memory before/during reload. */
19286 bool memory_p = (MEM_P (x)
19287 || (!reload_completed && SUBREG_P (x)
19288 && MEM_P (SUBREG_REG (x))));
19290 sri->icode = CODE_FOR_nothing;
19291 sri->t_icode = CODE_FOR_nothing;
19292 sri->extra_cost = 0;
19294 ? reg_addr[mode].reload_load
19295 : reg_addr[mode].reload_store);
19297 if (REG_P (x) || register_operand (x, mode))
19299 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
19300 bool altivec_p = (rclass == ALTIVEC_REGS);
19301 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
19304 std::swap (to_type, from_type);
19306 /* Can we do a direct move of some sort? */
19307 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
19310 icode = (enum insn_code)sri->icode;
19317 /* Make sure 0.0 is not reloaded or forced into memory. */
19318 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
19325 /* If this is a scalar floating point value and we want to load it into the
19326 traditional Altivec registers, do it via a move via a traditional floating
19327 point register, unless we have D-form addressing. Also make sure that
19328 non-zero constants use a FPR. */
19329 if (!done_p && reg_addr[mode].scalar_in_vmx_p
19330 && !mode_supports_vmx_dform (mode)
19331 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19332 && (memory_p || CONST_DOUBLE_P (x)))
19339 /* Handle reload of load/stores if we have reload helper functions. */
19340 if (!done_p && icode != CODE_FOR_nothing && memory_p)
19342 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
19345 if (extra_cost >= 0)
19349 if (extra_cost > 0)
19351 sri->extra_cost = extra_cost;
19352 sri->icode = icode;
19357 /* Handle unaligned loads and stores of integer registers. */
19358 if (!done_p && TARGET_POWERPC64
19359 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19361 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
19363 rtx addr = XEXP (x, 0);
19364 rtx off = address_offset (addr);
19366 if (off != NULL_RTX)
19368 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19369 unsigned HOST_WIDE_INT offset = INTVAL (off);
19371 /* We need a secondary reload when our legitimate_address_p
19372 says the address is good (as otherwise the entire address
19373 will be reloaded), and the offset is not a multiple of
19374 four or we have an address wrap. Address wrap will only
19375 occur for LO_SUMs since legitimate_offset_address_p
19376 rejects addresses for 16-byte mems that will wrap. */
19377 if (GET_CODE (addr) == LO_SUM
19378 ? (1 /* legitimate_address_p allows any offset for lo_sum */
19379 && ((offset & 3) != 0
19380 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
19381 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
19382 && (offset & 3) != 0))
19384 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
19386 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
19387 : CODE_FOR_reload_di_load);
19389 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
19390 : CODE_FOR_reload_di_store);
19391 sri->extra_cost = 2;
19402 if (!done_p && !TARGET_POWERPC64
19403 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19405 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
19407 rtx addr = XEXP (x, 0);
19408 rtx off = address_offset (addr);
19410 if (off != NULL_RTX)
19412 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19413 unsigned HOST_WIDE_INT offset = INTVAL (off);
19415 /* We need a secondary reload when our legitimate_address_p
19416 says the address is good (as otherwise the entire address
19417 will be reloaded), and we have a wrap.
19419 legitimate_lo_sum_address_p allows LO_SUM addresses to
19420 have any offset so test for wrap in the low 16 bits.
19422 legitimate_offset_address_p checks for the range
19423 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
19424 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
19425 [0x7ff4,0x7fff] respectively, so test for the
19426 intersection of these ranges, [0x7ffc,0x7fff] and
19427 [0x7ff4,0x7ff7] respectively.
19429 Note that the address we see here may have been
19430 manipulated by legitimize_reload_address. */
19431 if (GET_CODE (addr) == LO_SUM
19432 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
19433 : offset - (0x8000 - extra) < UNITS_PER_WORD)
19436 sri->icode = CODE_FOR_reload_si_load;
19438 sri->icode = CODE_FOR_reload_si_store;
19439 sri->extra_cost = 2;
19454 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
19456 gcc_assert (ret != ALL_REGS);
19458 if (TARGET_DEBUG_ADDR)
19461 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
19463 reg_class_names[ret],
19464 in_p ? "true" : "false",
19465 reg_class_names[rclass],
19466 GET_MODE_NAME (mode));
19468 if (reload_completed)
19469 fputs (", after reload", stderr);
19472 fputs (", done_p not set", stderr);
19475 fputs (", default secondary reload", stderr);
19477 if (sri->icode != CODE_FOR_nothing)
19478 fprintf (stderr, ", reload func = %s, extra cost = %d",
19479 insn_data[sri->icode].name, sri->extra_cost);
19481 else if (sri->extra_cost > 0)
19482 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
19484 fputs ("\n", stderr);
19491 /* Better tracing for rs6000_secondary_reload_inner. */
19494 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
19499 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
19501 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
19502 store_p ? "store" : "load");
19505 set = gen_rtx_SET (mem, reg);
19507 set = gen_rtx_SET (reg, mem);
19509 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
19510 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
19513 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
19514 ATTRIBUTE_NORETURN;
19517 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
19520 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
19521 gcc_unreachable ();
19524 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
19525 reload helper functions. These were identified in
19526 rs6000_secondary_reload_memory, and if reload decided to use the secondary
19527 reload, it calls the insns:
19528 reload_<RELOAD:mode>_<P:mptrsize>_store
19529 reload_<RELOAD:mode>_<P:mptrsize>_load
19531 which in turn calls this function, to do whatever is necessary to create
19532 valid addresses. */
19535 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
19537 int regno = true_regnum (reg);
19538 machine_mode mode = GET_MODE (reg);
19539 addr_mask_type addr_mask;
19542 rtx op_reg, op0, op1;
19547 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
19548 || !base_reg_operand (scratch, GET_MODE (scratch)))
19549 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19551 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
19552 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19554 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
19555 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19557 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
19558 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19561 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19563 /* Make sure the mode is valid in this register class. */
19564 if ((addr_mask & RELOAD_REG_VALID) == 0)
19565 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19567 if (TARGET_DEBUG_ADDR)
19568 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
19570 new_addr = addr = XEXP (mem, 0);
19571 switch (GET_CODE (addr))
19573 /* Does the register class support auto update forms for this mode? If
19574 not, do the update now. We don't need a scratch register, since the
19575 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
19578 op_reg = XEXP (addr, 0);
19579 if (!base_reg_operand (op_reg, Pmode))
19580 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19582 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19584 int delta = GET_MODE_SIZE (mode);
19585 if (GET_CODE (addr) == PRE_DEC)
19587 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
19593 op0 = XEXP (addr, 0);
19594 op1 = XEXP (addr, 1);
19595 if (!base_reg_operand (op0, Pmode)
19596 || GET_CODE (op1) != PLUS
19597 || !rtx_equal_p (op0, XEXP (op1, 0)))
19598 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19600 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19602 emit_insn (gen_rtx_SET (op0, op1));
19607 /* Do we need to simulate AND -16 to clear the bottom address bits used
19608 in VMX load/stores? */
19610 op0 = XEXP (addr, 0);
19611 op1 = XEXP (addr, 1);
19612 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
19614 if (REG_P (op0) || SUBREG_P (op0))
19617 else if (GET_CODE (op1) == PLUS)
19619 emit_insn (gen_rtx_SET (scratch, op1));
19624 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19626 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
19627 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
19628 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
19629 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
19630 new_addr = scratch;
19634 /* If this is an indirect address, make sure it is a base register. */
19637 if (!base_reg_operand (addr, GET_MODE (addr)))
19639 emit_insn (gen_rtx_SET (scratch, addr));
19640 new_addr = scratch;
19644 /* If this is an indexed address, make sure the register class can handle
19645 indexed addresses for this mode. */
19647 op0 = XEXP (addr, 0);
19648 op1 = XEXP (addr, 1);
19649 if (!base_reg_operand (op0, Pmode))
19650 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19652 else if (int_reg_operand (op1, Pmode))
19654 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19656 emit_insn (gen_rtx_SET (scratch, addr));
19657 new_addr = scratch;
19661 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
19663 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
19664 || !quad_address_p (addr, mode, false))
19666 emit_insn (gen_rtx_SET (scratch, addr));
19667 new_addr = scratch;
19671 /* Make sure the register class can handle offset addresses. */
19672 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19674 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19676 emit_insn (gen_rtx_SET (scratch, addr));
19677 new_addr = scratch;
19682 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19687 op0 = XEXP (addr, 0);
19688 op1 = XEXP (addr, 1);
19689 if (!base_reg_operand (op0, Pmode))
19690 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19692 else if (int_reg_operand (op1, Pmode))
19694 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19696 emit_insn (gen_rtx_SET (scratch, addr));
19697 new_addr = scratch;
19701 /* Quad offsets are restricted and can't handle normal addresses. */
19702 else if (mode_supports_dq_form (mode))
19704 emit_insn (gen_rtx_SET (scratch, addr));
19705 new_addr = scratch;
19708 /* Make sure the register class can handle offset addresses. */
19709 else if (legitimate_lo_sum_address_p (mode, addr, false))
19711 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19713 emit_insn (gen_rtx_SET (scratch, addr));
19714 new_addr = scratch;
19719 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19726 rs6000_emit_move (scratch, addr, Pmode);
19727 new_addr = scratch;
19731 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19734 /* Adjust the address if it changed. */
19735 if (addr != new_addr)
19737 mem = replace_equiv_address_nv (mem, new_addr);
19738 if (TARGET_DEBUG_ADDR)
19739 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
19742 /* Now create the move. */
19744 emit_insn (gen_rtx_SET (mem, reg));
19746 emit_insn (gen_rtx_SET (reg, mem));
19751 /* Convert reloads involving 64-bit gprs and misaligned offset
19752 addressing, or multiple 32-bit gprs and offsets that are too large,
19753 to use indirect addressing. */
19756 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
19758 int regno = true_regnum (reg);
19759 enum reg_class rclass;
19761 rtx scratch_or_premodify = scratch;
19763 if (TARGET_DEBUG_ADDR)
19765 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
19766 store_p ? "store" : "load");
19767 fprintf (stderr, "reg:\n");
19769 fprintf (stderr, "mem:\n");
19771 fprintf (stderr, "scratch:\n");
19772 debug_rtx (scratch);
19775 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
19776 gcc_assert (MEM_P (mem));
19777 rclass = REGNO_REG_CLASS (regno);
19778 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
19779 addr = XEXP (mem, 0);
19781 if (GET_CODE (addr) == PRE_MODIFY)
19783 gcc_assert (REG_P (XEXP (addr, 0))
19784 && GET_CODE (XEXP (addr, 1)) == PLUS
19785 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
19786 scratch_or_premodify = XEXP (addr, 0);
19787 addr = XEXP (addr, 1);
19789 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
19791 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
19793 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
19795 /* Now create the move. */
19797 emit_insn (gen_rtx_SET (mem, reg));
19799 emit_insn (gen_rtx_SET (reg, mem));
19804 /* Given an rtx X being reloaded into a reg required to be
19805 in class CLASS, return the class of reg to actually use.
19806 In general this is just CLASS; but on some machines
19807 in some cases it is preferable to use a more restrictive class.
19809 On the RS/6000, we have to return NO_REGS when we want to reload a
19810 floating-point CONST_DOUBLE to force it to be copied to memory.
19812 We also don't want to reload integer values into floating-point
19813 registers if we can at all help it. In fact, this can
19814 cause reload to die, if it tries to generate a reload of CTR
19815 into a FP register and discovers it doesn't have the memory location
19818 ??? Would it be a good idea to have reload do the converse, that is
19819 try to reload floating modes into FP registers if possible?
19822 static enum reg_class
19823 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
19825 machine_mode mode = GET_MODE (x);
19826 bool is_constant = CONSTANT_P (x);
19828 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
19829 reload class for it. */
19830 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19831 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
19834 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
19835 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
19838 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
19839 the reloading of address expressions using PLUS into floating point
19841 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
19845 /* Zero is always allowed in all VSX registers. */
19846 if (x == CONST0_RTX (mode))
19849 /* If this is a vector constant that can be formed with a few Altivec
19850 instructions, we want altivec registers. */
19851 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
19852 return ALTIVEC_REGS;
19854 /* If this is an integer constant that can easily be loaded into
19855 vector registers, allow it. */
19856 if (CONST_INT_P (x))
19858 HOST_WIDE_INT value = INTVAL (x);
19860 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
19861 2.06 can generate it in the Altivec registers with
19865 if (TARGET_P8_VECTOR)
19867 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19868 return ALTIVEC_REGS;
19873 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
19874 a sign extend in the Altivec registers. */
19875 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
19876 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
19877 return ALTIVEC_REGS;
19880 /* Force constant to memory. */
19884 /* D-form addressing can easily reload the value. */
19885 if (mode_supports_vmx_dform (mode)
19886 || mode_supports_dq_form (mode))
19889 /* If this is a scalar floating point value and we don't have D-form
19890 addressing, prefer the traditional floating point registers so that we
19891 can use D-form (register+offset) addressing. */
19892 if (rclass == VSX_REGS
19893 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
19896 /* Prefer the Altivec registers if Altivec is handling the vector
19897 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
19899 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
19900 || mode == V1TImode)
19901 return ALTIVEC_REGS;
19906 if (is_constant || GET_CODE (x) == PLUS)
19908 if (reg_class_subset_p (GENERAL_REGS, rclass))
19909 return GENERAL_REGS;
19910 if (reg_class_subset_p (BASE_REGS, rclass))
19915 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
19916 return GENERAL_REGS;
19921 /* Debug version of rs6000_preferred_reload_class. */
19922 static enum reg_class
19923 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
19925 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
19928 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
19930 reg_class_names[ret], reg_class_names[rclass],
19931 GET_MODE_NAME (GET_MODE (x)));
19937 /* If we are copying between FP or AltiVec registers and anything else, we need
19938 a memory location. The exception is when we are targeting ppc64 and the
19939 move to/from fpr to gpr instructions are available. Also, under VSX, you
19940 can copy vector registers from the FP register set to the Altivec register
19941 set and vice versa. */
19944 rs6000_secondary_memory_needed (machine_mode mode,
19945 reg_class_t from_class,
19946 reg_class_t to_class)
19948 enum rs6000_reg_type from_type, to_type;
19949 bool altivec_p = ((from_class == ALTIVEC_REGS)
19950 || (to_class == ALTIVEC_REGS));
19952 /* If a simple/direct move is available, we don't need secondary memory */
19953 from_type = reg_class_to_reg_type[(int)from_class];
19954 to_type = reg_class_to_reg_type[(int)to_class];
19956 if (rs6000_secondary_reload_move (to_type, from_type, mode,
19957 (secondary_reload_info *)0, altivec_p))
19960 /* If we have a floating point or vector register class, we need to use
19961 memory to transfer the data. */
19962 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
19968 /* Debug version of rs6000_secondary_memory_needed. */
19970 rs6000_debug_secondary_memory_needed (machine_mode mode,
19971 reg_class_t from_class,
19972 reg_class_t to_class)
19974 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
19977 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
19978 "to_class = %s, mode = %s\n",
19979 ret ? "true" : "false",
19980 reg_class_names[from_class],
19981 reg_class_names[to_class],
19982 GET_MODE_NAME (mode));
19987 /* Return the register class of a scratch register needed to copy IN into
19988 or out of a register in RCLASS in MODE. If it can be done directly,
19989 NO_REGS is returned. */
19991 static enum reg_class
19992 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
19997 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
19999 && MACHOPIC_INDIRECT
20003 /* We cannot copy a symbolic operand directly into anything
20004 other than BASE_REGS for TARGET_ELF. So indicate that a
20005 register from BASE_REGS is needed as an intermediate
20008 On Darwin, pic addresses require a load from memory, which
20009 needs a base register. */
20010 if (rclass != BASE_REGS
20011 && (SYMBOL_REF_P (in)
20012 || GET_CODE (in) == HIGH
20013 || GET_CODE (in) == LABEL_REF
20014 || GET_CODE (in) == CONST))
20020 regno = REGNO (in);
20021 if (!HARD_REGISTER_NUM_P (regno))
20023 regno = true_regnum (in);
20024 if (!HARD_REGISTER_NUM_P (regno))
20028 else if (SUBREG_P (in))
20030 regno = true_regnum (in);
20031 if (!HARD_REGISTER_NUM_P (regno))
20037 /* If we have VSX register moves, prefer moving scalar values between
20038 Altivec registers and GPR by going via an FPR (and then via memory)
20039 instead of reloading the secondary memory address for Altivec moves. */
20041 && GET_MODE_SIZE (mode) < 16
20042 && !mode_supports_vmx_dform (mode)
20043 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
20044 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
20045 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20046 && (regno >= 0 && INT_REGNO_P (regno)))))
20049 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
20051 if (rclass == GENERAL_REGS || rclass == BASE_REGS
20052 || (regno >= 0 && INT_REGNO_P (regno)))
20055 /* Constants, memory, and VSX registers can go into VSX registers (both the
20056 traditional floating point and the altivec registers). */
20057 if (rclass == VSX_REGS
20058 && (regno == -1 || VSX_REGNO_P (regno)))
20061 /* Constants, memory, and FP registers can go into FP registers. */
20062 if ((regno == -1 || FP_REGNO_P (regno))
20063 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
20064 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
20066 /* Memory, and AltiVec registers can go into AltiVec registers. */
20067 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
20068 && rclass == ALTIVEC_REGS)
20071 /* We can copy among the CR registers. */
20072 if ((rclass == CR_REGS || rclass == CR0_REGS)
20073 && regno >= 0 && CR_REGNO_P (regno))
20076 /* Otherwise, we need GENERAL_REGS. */
20077 return GENERAL_REGS;
20080 /* Debug version of rs6000_secondary_reload_class. */
20081 static enum reg_class
20082 rs6000_debug_secondary_reload_class (enum reg_class rclass,
20083 machine_mode mode, rtx in)
20085 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
20087 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
20088 "mode = %s, input rtx:\n",
20089 reg_class_names[ret], reg_class_names[rclass],
20090 GET_MODE_NAME (mode));
20096 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
20099 rs6000_can_change_mode_class (machine_mode from,
20101 reg_class_t rclass)
20103 unsigned from_size = GET_MODE_SIZE (from);
20104 unsigned to_size = GET_MODE_SIZE (to);
20106 if (from_size != to_size)
20108 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
20110 if (reg_classes_intersect_p (xclass, rclass))
20112 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
20113 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
20114 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
20115 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
20117 /* Don't allow 64-bit types to overlap with 128-bit types that take a
20118 single register under VSX because the scalar part of the register
20119 is in the upper 64-bits, and not the lower 64-bits. Types like
20120 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
20121 IEEE floating point can't overlap, and neither can small
20124 if (to_float128_vector_p && from_float128_vector_p)
20127 else if (to_float128_vector_p || from_float128_vector_p)
20130 /* TDmode in floating-mode registers must always go into a register
20131 pair with the most significant word in the even-numbered register
20132 to match ISA requirements. In little-endian mode, this does not
20133 match subreg numbering, so we cannot allow subregs. */
20134 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
20137 if (from_size < 8 || to_size < 8)
20140 if (from_size == 8 && (8 * to_nregs) != to_size)
20143 if (to_size == 8 && (8 * from_nregs) != from_size)
20152 /* Since the VSX register set includes traditional floating point registers
20153 and altivec registers, just check for the size being different instead of
20154 trying to check whether the modes are vector modes. Otherwise it won't
20155 allow say DF and DI to change classes. For types like TFmode and TDmode
20156 that take 2 64-bit registers, rather than a single 128-bit register, don't
20157 allow subregs of those types to other 128 bit types. */
20158 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
20160 unsigned num_regs = (from_size + 15) / 16;
20161 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
20162 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
20165 return (from_size == 8 || from_size == 16);
20168 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
20169 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
20175 /* Debug version of rs6000_can_change_mode_class. */
20177 rs6000_debug_can_change_mode_class (machine_mode from,
20179 reg_class_t rclass)
20181 bool ret = rs6000_can_change_mode_class (from, to, rclass);
20184 "rs6000_can_change_mode_class, return %s, from = %s, "
20185 "to = %s, rclass = %s\n",
20186 ret ? "true" : "false",
20187 GET_MODE_NAME (from), GET_MODE_NAME (to),
20188 reg_class_names[rclass]);
20193 /* Return a string to do a move operation of 128 bits of data. */
20196 rs6000_output_move_128bit (rtx operands[])
20198 rtx dest = operands[0];
20199 rtx src = operands[1];
20200 machine_mode mode = GET_MODE (dest);
20203 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
20204 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
20208 dest_regno = REGNO (dest);
20209 dest_gpr_p = INT_REGNO_P (dest_regno);
20210 dest_fp_p = FP_REGNO_P (dest_regno);
20211 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
20212 dest_vsx_p = dest_fp_p | dest_vmx_p;
20217 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
20222 src_regno = REGNO (src);
20223 src_gpr_p = INT_REGNO_P (src_regno);
20224 src_fp_p = FP_REGNO_P (src_regno);
20225 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
20226 src_vsx_p = src_fp_p | src_vmx_p;
20231 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
20234 /* Register moves. */
20235 if (dest_regno >= 0 && src_regno >= 0)
20242 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
20243 return (WORDS_BIG_ENDIAN
20244 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
20245 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
20247 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
20251 else if (TARGET_VSX && dest_vsx_p)
20254 return "xxlor %x0,%x1,%x1";
20256 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
20257 return (WORDS_BIG_ENDIAN
20258 ? "mtvsrdd %x0,%1,%L1"
20259 : "mtvsrdd %x0,%L1,%1");
20261 else if (TARGET_DIRECT_MOVE && src_gpr_p)
20265 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
20266 return "vor %0,%1,%1";
20268 else if (dest_fp_p && src_fp_p)
20273 else if (dest_regno >= 0 && MEM_P (src))
20277 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20283 else if (TARGET_ALTIVEC && dest_vmx_p
20284 && altivec_indexed_or_indirect_operand (src, mode))
20285 return "lvx %0,%y1";
20287 else if (TARGET_VSX && dest_vsx_p)
20289 if (mode_supports_dq_form (mode)
20290 && quad_address_p (XEXP (src, 0), mode, true))
20291 return "lxv %x0,%1";
20293 else if (TARGET_P9_VECTOR)
20294 return "lxvx %x0,%y1";
20296 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20297 return "lxvw4x %x0,%y1";
20300 return "lxvd2x %x0,%y1";
20303 else if (TARGET_ALTIVEC && dest_vmx_p)
20304 return "lvx %0,%y1";
20306 else if (dest_fp_p)
20311 else if (src_regno >= 0 && MEM_P (dest))
20315 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20316 return "stq %1,%0";
20321 else if (TARGET_ALTIVEC && src_vmx_p
20322 && altivec_indexed_or_indirect_operand (dest, mode))
20323 return "stvx %1,%y0";
20325 else if (TARGET_VSX && src_vsx_p)
20327 if (mode_supports_dq_form (mode)
20328 && quad_address_p (XEXP (dest, 0), mode, true))
20329 return "stxv %x1,%0";
20331 else if (TARGET_P9_VECTOR)
20332 return "stxvx %x1,%y0";
20334 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20335 return "stxvw4x %x1,%y0";
20338 return "stxvd2x %x1,%y0";
20341 else if (TARGET_ALTIVEC && src_vmx_p)
20342 return "stvx %1,%y0";
20349 else if (dest_regno >= 0
20350 && (CONST_INT_P (src)
20351 || CONST_WIDE_INT_P (src)
20352 || CONST_DOUBLE_P (src)
20353 || GET_CODE (src) == CONST_VECTOR))
20358 else if ((dest_vmx_p && TARGET_ALTIVEC)
20359 || (dest_vsx_p && TARGET_VSX))
20360 return output_vec_const_move (operands);
20363 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
20366 /* Validate a 128-bit move. */
20368 rs6000_move_128bit_ok_p (rtx operands[])
20370 machine_mode mode = GET_MODE (operands[0]);
20371 return (gpc_reg_operand (operands[0], mode)
20372 || gpc_reg_operand (operands[1], mode));
20375 /* Return true if a 128-bit move needs to be split. */
20377 rs6000_split_128bit_ok_p (rtx operands[])
20379 if (!reload_completed)
20382 if (!gpr_or_gpr_p (operands[0], operands[1]))
20385 if (quad_load_store_p (operands[0], operands[1]))
20392 /* Given a comparison operation, return the bit number in CCR to test. We
20393 know this is a valid comparison.
20395 SCC_P is 1 if this is for an scc. That means that %D will have been
20396 used instead of %C, so the bits will be in different places.
20398 Return -1 if OP isn't a valid comparison for some reason. */
20401 ccr_bit (rtx op, int scc_p)
20403 enum rtx_code code = GET_CODE (op);
20404 machine_mode cc_mode;
20409 if (!COMPARISON_P (op))
20412 reg = XEXP (op, 0);
20414 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
20417 cc_mode = GET_MODE (reg);
20418 cc_regnum = REGNO (reg);
20419 base_bit = 4 * (cc_regnum - CR0_REGNO);
20421 validate_condition_mode (code, cc_mode);
20423 /* When generating a sCOND operation, only positive conditions are
20442 return scc_p ? base_bit + 3 : base_bit + 2;
20444 return base_bit + 2;
20445 case GT: case GTU: case UNLE:
20446 return base_bit + 1;
20447 case LT: case LTU: case UNGE:
20449 case ORDERED: case UNORDERED:
20450 return base_bit + 3;
20453 /* If scc, we will have done a cror to put the bit in the
20454 unordered position. So test that bit. For integer, this is ! LT
20455 unless this is an scc insn. */
20456 return scc_p ? base_bit + 3 : base_bit;
20459 return scc_p ? base_bit + 3 : base_bit + 1;
20466 /* Return the GOT register. */
20469 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
20471 /* The second flow pass currently (June 1999) can't update
20472 regs_ever_live without disturbing other parts of the compiler, so
20473 update it here to make the prolog/epilogue code happy. */
20474 if (!can_create_pseudo_p ()
20475 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20476 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
20478 crtl->uses_pic_offset_table = 1;
20480 return pic_offset_table_rtx;
20483 static rs6000_stack_t stack_info;
20485 /* Function to init struct machine_function.
20486 This will be called, via a pointer variable,
20487 from push_function_context. */
20489 static struct machine_function *
20490 rs6000_init_machine_status (void)
20492 stack_info.reload_completed = 0;
20493 return ggc_cleared_alloc<machine_function> ();
20496 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
20498 /* Write out a function code label. */
20501 rs6000_output_function_entry (FILE *file, const char *fname)
20503 if (fname[0] != '.')
20505 switch (DEFAULT_ABI)
20508 gcc_unreachable ();
20514 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
20524 RS6000_OUTPUT_BASENAME (file, fname);
20527 /* Print an operand. Recognize special options, documented below. */
20530 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
20531 only introduced by the linker, when applying the sda21
20533 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
20534 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
20536 #define SMALL_DATA_RELOC "sda21"
20537 #define SMALL_DATA_REG 0
20541 print_operand (FILE *file, rtx x, int code)
20544 unsigned HOST_WIDE_INT uval;
20548 /* %a is output_address. */
20550 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
20554 /* Like 'J' but get to the GT bit only. */
20555 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20557 output_operand_lossage ("invalid %%D value");
20561 /* Bit 1 is GT bit. */
20562 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
20564 /* Add one for shift count in rlinm for scc. */
20565 fprintf (file, "%d", i + 1);
20569 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
20572 output_operand_lossage ("invalid %%e value");
20577 if ((uval & 0xffff) == 0 && uval != 0)
20582 /* X is a CR register. Print the number of the EQ bit of the CR */
20583 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20584 output_operand_lossage ("invalid %%E value");
20586 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
20590 /* X is a CR register. Print the shift count needed to move it
20591 to the high-order four bits. */
20592 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20593 output_operand_lossage ("invalid %%f value");
20595 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
20599 /* Similar, but print the count for the rotate in the opposite
20601 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20602 output_operand_lossage ("invalid %%F value");
20604 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
20608 /* X is a constant integer. If it is negative, print "m",
20609 otherwise print "z". This is to make an aze or ame insn. */
20610 if (!CONST_INT_P (x))
20611 output_operand_lossage ("invalid %%G value");
20612 else if (INTVAL (x) >= 0)
20619 /* If constant, output low-order five bits. Otherwise, write
20622 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
20624 print_operand (file, x, 0);
20628 /* If constant, output low-order six bits. Otherwise, write
20631 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
20633 print_operand (file, x, 0);
20637 /* Print `i' if this is a constant, else nothing. */
20643 /* Write the bit number in CCR for jump. */
20644 i = ccr_bit (x, 0);
20646 output_operand_lossage ("invalid %%j code");
20648 fprintf (file, "%d", i);
20652 /* Similar, but add one for shift count in rlinm for scc and pass
20653 scc flag to `ccr_bit'. */
20654 i = ccr_bit (x, 1);
20656 output_operand_lossage ("invalid %%J code");
20658 /* If we want bit 31, write a shift count of zero, not 32. */
20659 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20663 /* X must be a constant. Write the 1's complement of the
20666 output_operand_lossage ("invalid %%k value");
20668 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
20672 /* X must be a symbolic constant on ELF. Write an
20673 expression suitable for an 'addi' that adds in the low 16
20674 bits of the MEM. */
20675 if (GET_CODE (x) == CONST)
20677 if (GET_CODE (XEXP (x, 0)) != PLUS
20678 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
20679 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
20680 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20681 output_operand_lossage ("invalid %%K value");
20683 print_operand_address (file, x);
20684 fputs ("@l", file);
20687 /* %l is output_asm_label. */
20690 /* Write second word of DImode or DFmode reference. Works on register
20691 or non-indexed memory only. */
20693 fputs (reg_names[REGNO (x) + 1], file);
20694 else if (MEM_P (x))
20696 machine_mode mode = GET_MODE (x);
20697 /* Handle possible auto-increment. Since it is pre-increment and
20698 we have already done it, we can just use an offset of word. */
20699 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20700 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20701 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20703 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20704 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20707 output_address (mode, XEXP (adjust_address_nv (x, SImode,
20711 if (small_data_operand (x, GET_MODE (x)))
20712 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20713 reg_names[SMALL_DATA_REG]);
20717 case 'N': /* Unused */
20718 /* Write the number of elements in the vector times 4. */
20719 if (GET_CODE (x) != PARALLEL)
20720 output_operand_lossage ("invalid %%N value");
20722 fprintf (file, "%d", XVECLEN (x, 0) * 4);
20725 case 'O': /* Unused */
20726 /* Similar, but subtract 1 first. */
20727 if (GET_CODE (x) != PARALLEL)
20728 output_operand_lossage ("invalid %%O value");
20730 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
20734 /* X is a CONST_INT that is a power of two. Output the logarithm. */
20737 || (i = exact_log2 (INTVAL (x))) < 0)
20738 output_operand_lossage ("invalid %%p value");
20740 fprintf (file, "%d", i);
20744 /* The operand must be an indirect memory reference. The result
20745 is the register name. */
20746 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
20747 || REGNO (XEXP (x, 0)) >= 32)
20748 output_operand_lossage ("invalid %%P value");
20750 fputs (reg_names[REGNO (XEXP (x, 0))], file);
20754 /* This outputs the logical code corresponding to a boolean
20755 expression. The expression may have one or both operands
20756 negated (if one, only the first one). For condition register
20757 logical operations, it will also treat the negated
20758 CR codes as NOTs, but not handle NOTs of them. */
20760 const char *const *t = 0;
20762 enum rtx_code code = GET_CODE (x);
20763 static const char * const tbl[3][3] = {
20764 { "and", "andc", "nor" },
20765 { "or", "orc", "nand" },
20766 { "xor", "eqv", "xor" } };
20770 else if (code == IOR)
20772 else if (code == XOR)
20775 output_operand_lossage ("invalid %%q value");
20777 if (GET_CODE (XEXP (x, 0)) != NOT)
20781 if (GET_CODE (XEXP (x, 1)) == NOT)
20792 if (! TARGET_MFCRF)
20798 /* X is a CR register. Print the mask for `mtcrf'. */
20799 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20800 output_operand_lossage ("invalid %%R value");
20802 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
20806 /* Low 5 bits of 32 - value */
20808 output_operand_lossage ("invalid %%s value");
20810 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
20814 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
20815 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20817 output_operand_lossage ("invalid %%t value");
20821 /* Bit 3 is OV bit. */
20822 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
20824 /* If we want bit 31, write a shift count of zero, not 32. */
20825 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20829 /* Print the symbolic name of a branch target register. */
20830 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
20831 x = XVECEXP (x, 0, 0);
20832 if (!REG_P (x) || (REGNO (x) != LR_REGNO
20833 && REGNO (x) != CTR_REGNO))
20834 output_operand_lossage ("invalid %%T value");
20835 else if (REGNO (x) == LR_REGNO)
20836 fputs ("lr", file);
20838 fputs ("ctr", file);
20842 /* High-order or low-order 16 bits of constant, whichever is non-zero,
20843 for use in unsigned operand. */
20846 output_operand_lossage ("invalid %%u value");
20851 if ((uval & 0xffff) == 0)
20854 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
20858 /* High-order 16 bits of constant for use in signed operand. */
20860 output_operand_lossage ("invalid %%v value");
20862 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
20863 (INTVAL (x) >> 16) & 0xffff);
20867 /* Print `u' if this has an auto-increment or auto-decrement. */
20869 && (GET_CODE (XEXP (x, 0)) == PRE_INC
20870 || GET_CODE (XEXP (x, 0)) == PRE_DEC
20871 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
20876 /* Print the trap code for this operand. */
20877 switch (GET_CODE (x))
20880 fputs ("eq", file); /* 4 */
20883 fputs ("ne", file); /* 24 */
20886 fputs ("lt", file); /* 16 */
20889 fputs ("le", file); /* 20 */
20892 fputs ("gt", file); /* 8 */
20895 fputs ("ge", file); /* 12 */
20898 fputs ("llt", file); /* 2 */
20901 fputs ("lle", file); /* 6 */
20904 fputs ("lgt", file); /* 1 */
20907 fputs ("lge", file); /* 5 */
20910 output_operand_lossage ("invalid %%V value");
20915 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
20918 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
20919 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
20921 print_operand (file, x, 0);
20925 /* X is a FPR or Altivec register used in a VSX context. */
20926 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
20927 output_operand_lossage ("invalid %%x value");
20930 int reg = REGNO (x);
20931 int vsx_reg = (FP_REGNO_P (reg)
20933 : reg - FIRST_ALTIVEC_REGNO + 32);
20935 #ifdef TARGET_REGNAMES
20936 if (TARGET_REGNAMES)
20937 fprintf (file, "%%vs%d", vsx_reg);
20940 fprintf (file, "%d", vsx_reg);
20946 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
20947 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
20948 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
20953 /* Like 'L', for third word of TImode/PTImode */
20955 fputs (reg_names[REGNO (x) + 2], file);
20956 else if (MEM_P (x))
20958 machine_mode mode = GET_MODE (x);
20959 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20960 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20961 output_address (mode, plus_constant (Pmode,
20962 XEXP (XEXP (x, 0), 0), 8));
20963 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20964 output_address (mode, plus_constant (Pmode,
20965 XEXP (XEXP (x, 0), 0), 8));
20967 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
20968 if (small_data_operand (x, GET_MODE (x)))
20969 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20970 reg_names[SMALL_DATA_REG]);
20975 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
20976 x = XVECEXP (x, 0, 1);
20977 /* X is a SYMBOL_REF. Write out the name preceded by a
20978 period and without any trailing data in brackets. Used for function
20979 names. If we are configured for System V (or the embedded ABI) on
20980 the PowerPC, do not emit the period, since those systems do not use
20981 TOCs and the like. */
20982 if (!SYMBOL_REF_P (x))
20984 output_operand_lossage ("invalid %%z value");
20988 /* For macho, check to see if we need a stub. */
20991 const char *name = XSTR (x, 0);
20993 if (darwin_emit_branch_islands
20994 && MACHOPIC_INDIRECT
20995 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
20996 name = machopic_indirection_name (x, /*stub_p=*/true);
20998 assemble_name (file, name);
21000 else if (!DOT_SYMBOLS)
21001 assemble_name (file, XSTR (x, 0));
21003 rs6000_output_function_entry (file, XSTR (x, 0));
21007 /* Like 'L', for last word of TImode/PTImode. */
21009 fputs (reg_names[REGNO (x) + 3], file);
21010 else if (MEM_P (x))
21012 machine_mode mode = GET_MODE (x);
21013 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21014 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21015 output_address (mode, plus_constant (Pmode,
21016 XEXP (XEXP (x, 0), 0), 12));
21017 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21018 output_address (mode, plus_constant (Pmode,
21019 XEXP (XEXP (x, 0), 0), 12));
21021 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
21022 if (small_data_operand (x, GET_MODE (x)))
21023 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21024 reg_names[SMALL_DATA_REG]);
21028 /* Print AltiVec memory operand. */
21033 gcc_assert (MEM_P (x));
21037 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
21038 && GET_CODE (tmp) == AND
21039 && CONST_INT_P (XEXP (tmp, 1))
21040 && INTVAL (XEXP (tmp, 1)) == -16)
21041 tmp = XEXP (tmp, 0);
21042 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
21043 && GET_CODE (tmp) == PRE_MODIFY)
21044 tmp = XEXP (tmp, 1);
21046 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
21049 if (GET_CODE (tmp) != PLUS
21050 || !REG_P (XEXP (tmp, 0))
21051 || !REG_P (XEXP (tmp, 1)))
21053 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
21057 if (REGNO (XEXP (tmp, 0)) == 0)
21058 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
21059 reg_names[ REGNO (XEXP (tmp, 0)) ]);
21061 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
21062 reg_names[ REGNO (XEXP (tmp, 1)) ]);
21069 fprintf (file, "%s", reg_names[REGNO (x)]);
21070 else if (MEM_P (x))
21072 /* We need to handle PRE_INC and PRE_DEC here, since we need to
21073 know the width from the mode. */
21074 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
21075 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
21076 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21077 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
21078 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
21079 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21080 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21081 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
21083 output_address (GET_MODE (x), XEXP (x, 0));
21085 else if (toc_relative_expr_p (x, false,
21086 &tocrel_base_oac, &tocrel_offset_oac))
21087 /* This hack along with a corresponding hack in
21088 rs6000_output_addr_const_extra arranges to output addends
21089 where the assembler expects to find them. eg.
21090 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
21091 without this hack would be output as "x@toc+4". We
21093 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21094 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
21095 output_addr_const (file, XVECEXP (x, 0, 0));
21096 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
21097 output_addr_const (file, XVECEXP (x, 0, 1));
21099 output_addr_const (file, x);
21103 if (const char *name = get_some_local_dynamic_name ())
21104 assemble_name (file, name);
21106 output_operand_lossage ("'%%&' used without any "
21107 "local dynamic TLS references");
21111 output_operand_lossage ("invalid %%xn code");
21115 /* Print the address of an operand. */
21118 print_operand_address (FILE *file, rtx x)
21121 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
21122 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
21123 || GET_CODE (x) == LABEL_REF)
21125 output_addr_const (file, x);
21126 if (small_data_operand (x, GET_MODE (x)))
21127 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21128 reg_names[SMALL_DATA_REG]);
21130 gcc_assert (!TARGET_TOC);
21132 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21133 && REG_P (XEXP (x, 1)))
21135 if (REGNO (XEXP (x, 0)) == 0)
21136 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
21137 reg_names[ REGNO (XEXP (x, 0)) ]);
21139 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
21140 reg_names[ REGNO (XEXP (x, 1)) ]);
21142 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21143 && CONST_INT_P (XEXP (x, 1)))
21144 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
21145 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
21147 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21148 && CONSTANT_P (XEXP (x, 1)))
21150 fprintf (file, "lo16(");
21151 output_addr_const (file, XEXP (x, 1));
21152 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21156 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21157 && CONSTANT_P (XEXP (x, 1)))
21159 output_addr_const (file, XEXP (x, 1));
21160 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21163 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
21165 /* This hack along with a corresponding hack in
21166 rs6000_output_addr_const_extra arranges to output addends
21167 where the assembler expects to find them. eg.
21169 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
21170 without this hack would be output as "x@toc+8@l(9)". We
21171 want "x+8@toc@l(9)". */
21172 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21173 if (GET_CODE (x) == LO_SUM)
21174 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
21176 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
21179 output_addr_const (file, x);
21182 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
21185 rs6000_output_addr_const_extra (FILE *file, rtx x)
21187 if (GET_CODE (x) == UNSPEC)
21188 switch (XINT (x, 1))
21190 case UNSPEC_TOCREL:
21191 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
21192 && REG_P (XVECEXP (x, 0, 1))
21193 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
21194 output_addr_const (file, XVECEXP (x, 0, 0));
21195 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
21197 if (INTVAL (tocrel_offset_oac) >= 0)
21198 fprintf (file, "+");
21199 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
21201 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
21204 assemble_name (file, toc_label_name);
21207 else if (TARGET_ELF)
21208 fputs ("@toc", file);
21212 case UNSPEC_MACHOPIC_OFFSET:
21213 output_addr_const (file, XVECEXP (x, 0, 0));
21215 machopic_output_function_base_name (file);
21222 /* Target hook for assembling integer objects. The PowerPC version has
21223 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
21224 is defined. It also needs to handle DI-mode objects on 64-bit
21228 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
21230 #ifdef RELOCATABLE_NEEDS_FIXUP
21231 /* Special handling for SI values. */
21232 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
21234 static int recurse = 0;
21236 /* For -mrelocatable, we mark all addresses that need to be fixed up in
21237 the .fixup section. Since the TOC section is already relocated, we
21238 don't need to mark it here. We used to skip the text section, but it
21239 should never be valid for relocated addresses to be placed in the text
21241 if (DEFAULT_ABI == ABI_V4
21242 && (TARGET_RELOCATABLE || flag_pic > 1)
21243 && in_section != toc_section
21245 && !CONST_SCALAR_INT_P (x)
21251 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
21253 ASM_OUTPUT_LABEL (asm_out_file, buf);
21254 fprintf (asm_out_file, "\t.long\t(");
21255 output_addr_const (asm_out_file, x);
21256 fprintf (asm_out_file, ")@fixup\n");
21257 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
21258 ASM_OUTPUT_ALIGN (asm_out_file, 2);
21259 fprintf (asm_out_file, "\t.long\t");
21260 assemble_name (asm_out_file, buf);
21261 fprintf (asm_out_file, "\n\t.previous\n");
21265 /* Remove initial .'s to turn a -mcall-aixdesc function
21266 address into the address of the descriptor, not the function
21268 else if (SYMBOL_REF_P (x)
21269 && XSTR (x, 0)[0] == '.'
21270 && DEFAULT_ABI == ABI_AIX)
21272 const char *name = XSTR (x, 0);
21273 while (*name == '.')
21276 fprintf (asm_out_file, "\t.long\t%s\n", name);
21280 #endif /* RELOCATABLE_NEEDS_FIXUP */
21281 return default_assemble_integer (x, size, aligned_p);
21284 /* Return a template string for assembly to emit when making an
21285 external call. FUNOP is the call mem argument operand number. */
21287 static const char *
21288 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
21290 /* -Wformat-overflow workaround, without which gcc thinks that %u
21291 might produce 10 digits. */
21292 gcc_assert (funop <= MAX_RECOG_OPERANDS);
21296 if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
21298 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
21299 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
21300 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
21301 sprintf (arg, "(%%&@tlsld)");
21303 gcc_unreachable ();
21306 /* The magic 32768 offset here corresponds to the offset of
21307 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
21309 sprintf (z, "%%z%u%s", funop,
21310 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
21313 static char str[32]; /* 2 spare */
21314 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21315 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
21316 sibcall ? "" : "\n\tnop");
21317 else if (DEFAULT_ABI == ABI_V4)
21318 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
21319 flag_pic ? "@plt" : "");
21321 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
21322 else if (DEFAULT_ABI == ABI_DARWIN)
21324 /* The cookie is in operand func+2. */
21325 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
21326 int cookie = INTVAL (operands[funop + 2]);
21327 if (cookie & CALL_LONG)
21329 tree funname = get_identifier (XSTR (operands[funop], 0));
21330 tree labelname = get_prev_label (funname);
21331 gcc_checking_assert (labelname && !sibcall);
21333 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
21334 instruction will reach 'foo', otherwise link as 'bl L42'".
21335 "L42" should be a 'branch island', that will do a far jump to
21336 'foo'. Branch islands are generated in
21337 macho_branch_islands(). */
21338 sprintf (str, "jbsr %%z%u,%.10s", funop,
21339 IDENTIFIER_POINTER (labelname));
21342 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
21344 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
21348 gcc_unreachable ();
21353 rs6000_call_template (rtx *operands, unsigned int funop)
21355 return rs6000_call_template_1 (operands, funop, false);
21359 rs6000_sibcall_template (rtx *operands, unsigned int funop)
21361 return rs6000_call_template_1 (operands, funop, true);
21364 /* As above, for indirect calls. */
21366 static const char *
21367 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
21370 /* -Wformat-overflow workaround, without which gcc thinks that %u
21371 might produce 10 digits. Note that -Wformat-overflow will not
21372 currently warn here for str[], so do not rely on a warning to
21373 ensure str[] is correctly sized. */
21374 gcc_assert (funop <= MAX_RECOG_OPERANDS);
21376 /* Currently, funop is either 0 or 1. The maximum string is always
21377 a !speculate 64-bit __tls_get_addr call.
21381 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21382 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
21384 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21385 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
21392 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21393 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
21395 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21396 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
21403 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21404 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
21406 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21407 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
21411 static char str[160]; /* 8 spare */
21413 const char *ptrload = TARGET_64BIT ? "d" : "wz";
21415 if (DEFAULT_ABI == ABI_AIX)
21418 ptrload, funop + 2);
21420 /* We don't need the extra code to stop indirect call speculation if
21422 bool speculate = (TARGET_MACHO
21423 || rs6000_speculate_indirect_jumps
21424 || (REG_P (operands[funop])
21425 && REGNO (operands[funop]) == LR_REGNO));
21427 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
21429 const char *rel64 = TARGET_64BIT ? "64" : "";
21432 if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
21434 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
21435 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
21437 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
21438 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
21441 gcc_unreachable ();
21444 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
21445 && flag_pic == 2 ? "+32768" : "");
21449 "%s.reloc .,R_PPC%s_PLTSEQ,%%z%u%s\n\t",
21450 tls, rel64, funop, addend);
21451 s += sprintf (s, "crset 2\n\t");
21454 "%s.reloc .,R_PPC%s_PLTCALL,%%z%u%s\n\t",
21455 tls, rel64, funop, addend);
21457 else if (!speculate)
21458 s += sprintf (s, "crset 2\n\t");
21460 if (DEFAULT_ABI == ABI_AIX)
21466 funop, ptrload, funop + 3);
21471 funop, ptrload, funop + 3);
21473 else if (DEFAULT_ABI == ABI_ELFv2)
21479 funop, ptrload, funop + 2);
21484 funop, ptrload, funop + 2);
21491 funop, sibcall ? "" : "l");
21495 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
21501 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
21503 return rs6000_indirect_call_template_1 (operands, funop, false);
21507 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
21509 return rs6000_indirect_call_template_1 (operands, funop, true);
21513 /* Output indirect call insns.
21514 WHICH is 0 for tocsave, 1 for plt16_ha, 2 for plt16_lo, 3 for mtctr. */
21516 rs6000_pltseq_template (rtx *operands, int which)
21518 const char *rel64 = TARGET_64BIT ? "64" : "";
21521 if (TARGET_TLS_MARKERS && GET_CODE (operands[3]) == UNSPEC)
21523 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
21524 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%3\n\t",
21526 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
21527 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
21530 gcc_unreachable ();
21533 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
21534 static char str[96]; /* 15 spare */
21535 const char *off = WORDS_BIG_ENDIAN ? "+2" : "";
21536 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
21537 && flag_pic == 2 ? "+32768" : "");
21542 "%s.reloc .,R_PPC%s_PLTSEQ,%%z2\n\t"
21544 tls, rel64, TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)");
21547 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
21549 "%s.reloc .%s,R_PPC%s_PLT16_HA,%%z2\n\t"
21554 "%s.reloc .%s,R_PPC%s_PLT16_HA,%%z2%s\n\t"
21556 tls, off, rel64, addend);
21560 "%s.reloc .%s,R_PPC%s_PLT16_LO%s,%%z2%s\n\t"
21562 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend,
21563 TARGET_64BIT ? "d" : "wz");
21567 "%s.reloc .,R_PPC%s_PLTSEQ,%%z2%s\n\t"
21569 tls, rel64, addend);
21572 gcc_unreachable ();
21578 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
21579 /* Emit an assembler directive to set symbol visibility for DECL to
21580 VISIBILITY_TYPE. */
21583 rs6000_assemble_visibility (tree decl, int vis)
21588 /* Functions need to have their entry point symbol visibility set as
21589 well as their descriptor symbol visibility. */
21590 if (DEFAULT_ABI == ABI_AIX
21592 && TREE_CODE (decl) == FUNCTION_DECL)
21594 static const char * const visibility_types[] = {
21595 NULL, "protected", "hidden", "internal"
21598 const char *name, *type;
21600 name = ((* targetm.strip_name_encoding)
21601 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
21602 type = visibility_types[vis];
21604 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
21605 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
21608 default_assemble_visibility (decl, vis);
21613 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
21615 /* Reversal of FP compares takes care -- an ordered compare
21616 becomes an unordered compare and vice versa. */
21617 if (mode == CCFPmode
21618 && (!flag_finite_math_only
21619 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
21620 || code == UNEQ || code == LTGT))
21621 return reverse_condition_maybe_unordered (code);
21623 return reverse_condition (code);
21626 /* Generate a compare for CODE. Return a brand-new rtx that
21627 represents the result of the compare. */
21630 rs6000_generate_compare (rtx cmp, machine_mode mode)
21632 machine_mode comp_mode;
21633 rtx compare_result;
21634 enum rtx_code code = GET_CODE (cmp);
21635 rtx op0 = XEXP (cmp, 0);
21636 rtx op1 = XEXP (cmp, 1);
21638 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21639 comp_mode = CCmode;
21640 else if (FLOAT_MODE_P (mode))
21641 comp_mode = CCFPmode;
21642 else if (code == GTU || code == LTU
21643 || code == GEU || code == LEU)
21644 comp_mode = CCUNSmode;
21645 else if ((code == EQ || code == NE)
21646 && unsigned_reg_p (op0)
21647 && (unsigned_reg_p (op1)
21648 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
21649 /* These are unsigned values, perhaps there will be a later
21650 ordering compare that can be shared with this one. */
21651 comp_mode = CCUNSmode;
21653 comp_mode = CCmode;
21655 /* If we have an unsigned compare, make sure we don't have a signed value as
21657 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
21658 && INTVAL (op1) < 0)
21660 op0 = copy_rtx_if_shared (op0);
21661 op1 = force_reg (GET_MODE (op0), op1);
21662 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
21665 /* First, the compare. */
21666 compare_result = gen_reg_rtx (comp_mode);
21668 /* IEEE 128-bit support in VSX registers when we do not have hardware
21670 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21672 rtx libfunc = NULL_RTX;
21673 bool check_nan = false;
21680 libfunc = optab_libfunc (eq_optab, mode);
21685 libfunc = optab_libfunc (ge_optab, mode);
21690 libfunc = optab_libfunc (le_optab, mode);
21695 libfunc = optab_libfunc (unord_optab, mode);
21696 code = (code == UNORDERED) ? NE : EQ;
21702 libfunc = optab_libfunc (ge_optab, mode);
21703 code = (code == UNGE) ? GE : GT;
21709 libfunc = optab_libfunc (le_optab, mode);
21710 code = (code == UNLE) ? LE : LT;
21716 libfunc = optab_libfunc (eq_optab, mode);
21717 code = (code = UNEQ) ? EQ : NE;
21721 gcc_unreachable ();
21724 gcc_assert (libfunc);
21727 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21728 SImode, op0, mode, op1, mode);
21730 /* The library signals an exception for signalling NaNs, so we need to
21731 handle isgreater, etc. by first checking isordered. */
21734 rtx ne_rtx, normal_dest, unord_dest;
21735 rtx unord_func = optab_libfunc (unord_optab, mode);
21736 rtx join_label = gen_label_rtx ();
21737 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
21738 rtx unord_cmp = gen_reg_rtx (comp_mode);
21741 /* Test for either value being a NaN. */
21742 gcc_assert (unord_func);
21743 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
21744 SImode, op0, mode, op1, mode);
21746 /* Set value (0) if either value is a NaN, and jump to the join
21748 dest = gen_reg_rtx (SImode);
21749 emit_move_insn (dest, const1_rtx);
21750 emit_insn (gen_rtx_SET (unord_cmp,
21751 gen_rtx_COMPARE (comp_mode, unord_dest,
21754 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
21755 emit_jump_insn (gen_rtx_SET (pc_rtx,
21756 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
21760 /* Do the normal comparison, knowing that the values are not
21762 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21763 SImode, op0, mode, op1, mode);
21765 emit_insn (gen_cstoresi4 (dest,
21766 gen_rtx_fmt_ee (code, SImode, normal_dest,
21768 normal_dest, const0_rtx));
21770 /* Join NaN and non-Nan paths. Compare dest against 0. */
21771 emit_label (join_label);
21775 emit_insn (gen_rtx_SET (compare_result,
21776 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
21781 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
21782 CLOBBERs to match cmptf_internal2 pattern. */
21783 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
21784 && FLOAT128_IBM_P (GET_MODE (op0))
21785 && TARGET_HARD_FLOAT)
21786 emit_insn (gen_rtx_PARALLEL (VOIDmode,
21788 gen_rtx_SET (compare_result,
21789 gen_rtx_COMPARE (comp_mode, op0, op1)),
21790 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21791 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21792 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21793 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21794 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21795 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21796 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21797 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21798 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
21799 else if (GET_CODE (op1) == UNSPEC
21800 && XINT (op1, 1) == UNSPEC_SP_TEST)
21802 rtx op1b = XVECEXP (op1, 0, 0);
21803 comp_mode = CCEQmode;
21804 compare_result = gen_reg_rtx (CCEQmode);
21806 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
21808 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
21811 emit_insn (gen_rtx_SET (compare_result,
21812 gen_rtx_COMPARE (comp_mode, op0, op1)));
21815 /* Some kinds of FP comparisons need an OR operation;
21816 under flag_finite_math_only we don't bother. */
21817 if (FLOAT_MODE_P (mode)
21818 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
21819 && !flag_finite_math_only
21820 && (code == LE || code == GE
21821 || code == UNEQ || code == LTGT
21822 || code == UNGT || code == UNLT))
21824 enum rtx_code or1, or2;
21825 rtx or1_rtx, or2_rtx, compare2_rtx;
21826 rtx or_result = gen_reg_rtx (CCEQmode);
21830 case LE: or1 = LT; or2 = EQ; break;
21831 case GE: or1 = GT; or2 = EQ; break;
21832 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
21833 case LTGT: or1 = LT; or2 = GT; break;
21834 case UNGT: or1 = UNORDERED; or2 = GT; break;
21835 case UNLT: or1 = UNORDERED; or2 = LT; break;
21836 default: gcc_unreachable ();
21838 validate_condition_mode (or1, comp_mode);
21839 validate_condition_mode (or2, comp_mode);
21840 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
21841 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
21842 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
21843 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
21845 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
21847 compare_result = or_result;
21851 validate_condition_mode (code, GET_MODE (compare_result));
21853 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
21857 /* Return the diagnostic message string if the binary operation OP is
21858 not permitted on TYPE1 and TYPE2, NULL otherwise. */
21861 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
21865 machine_mode mode1 = TYPE_MODE (type1);
21866 machine_mode mode2 = TYPE_MODE (type2);
21868 /* For complex modes, use the inner type. */
21869 if (COMPLEX_MODE_P (mode1))
21870 mode1 = GET_MODE_INNER (mode1);
21872 if (COMPLEX_MODE_P (mode2))
21873 mode2 = GET_MODE_INNER (mode2);
21875 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
21876 double to intermix unless -mfloat128-convert. */
21877 if (mode1 == mode2)
21880 if (!TARGET_FLOAT128_CVT)
21882 if ((mode1 == KFmode && mode2 == IFmode)
21883 || (mode1 == IFmode && mode2 == KFmode))
21884 return N_("__float128 and __ibm128 cannot be used in the same "
21887 if (TARGET_IEEEQUAD
21888 && ((mode1 == IFmode && mode2 == TFmode)
21889 || (mode1 == TFmode && mode2 == IFmode)))
21890 return N_("__ibm128 and long double cannot be used in the same "
21893 if (!TARGET_IEEEQUAD
21894 && ((mode1 == KFmode && mode2 == TFmode)
21895 || (mode1 == TFmode && mode2 == KFmode)))
21896 return N_("__float128 and long double cannot be used in the same "
21904 /* Expand floating point conversion to/from __float128 and __ibm128. */
21907 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
21909 machine_mode dest_mode = GET_MODE (dest);
21910 machine_mode src_mode = GET_MODE (src);
21911 convert_optab cvt = unknown_optab;
21912 bool do_move = false;
21913 rtx libfunc = NULL_RTX;
21915 typedef rtx (*rtx_2func_t) (rtx, rtx);
21916 rtx_2func_t hw_convert = (rtx_2func_t)0;
21920 rtx_2func_t from_df;
21921 rtx_2func_t from_sf;
21922 rtx_2func_t from_si_sign;
21923 rtx_2func_t from_si_uns;
21924 rtx_2func_t from_di_sign;
21925 rtx_2func_t from_di_uns;
21928 rtx_2func_t to_si_sign;
21929 rtx_2func_t to_si_uns;
21930 rtx_2func_t to_di_sign;
21931 rtx_2func_t to_di_uns;
21932 } hw_conversions[2] = {
21933 /* convertions to/from KFmode */
21935 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
21936 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
21937 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
21938 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
21939 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
21940 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
21941 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
21942 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
21943 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
21944 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
21945 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
21946 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
21949 /* convertions to/from TFmode */
21951 gen_extenddftf2_hw, /* TFmode <- DFmode. */
21952 gen_extendsftf2_hw, /* TFmode <- SFmode. */
21953 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
21954 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
21955 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
21956 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
21957 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
21958 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
21959 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
21960 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
21961 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
21962 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
21966 if (dest_mode == src_mode)
21967 gcc_unreachable ();
21969 /* Eliminate memory operations. */
21971 src = force_reg (src_mode, src);
21975 rtx tmp = gen_reg_rtx (dest_mode);
21976 rs6000_expand_float128_convert (tmp, src, unsigned_p);
21977 rs6000_emit_move (dest, tmp, dest_mode);
21981 /* Convert to IEEE 128-bit floating point. */
21982 if (FLOAT128_IEEE_P (dest_mode))
21984 if (dest_mode == KFmode)
21986 else if (dest_mode == TFmode)
21989 gcc_unreachable ();
21995 hw_convert = hw_conversions[kf_or_tf].from_df;
22000 hw_convert = hw_conversions[kf_or_tf].from_sf;
22006 if (FLOAT128_IBM_P (src_mode))
22015 cvt = ufloat_optab;
22016 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
22020 cvt = sfloat_optab;
22021 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
22028 cvt = ufloat_optab;
22029 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
22033 cvt = sfloat_optab;
22034 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
22039 gcc_unreachable ();
22043 /* Convert from IEEE 128-bit floating point. */
22044 else if (FLOAT128_IEEE_P (src_mode))
22046 if (src_mode == KFmode)
22048 else if (src_mode == TFmode)
22051 gcc_unreachable ();
22057 hw_convert = hw_conversions[kf_or_tf].to_df;
22062 hw_convert = hw_conversions[kf_or_tf].to_sf;
22068 if (FLOAT128_IBM_P (dest_mode))
22078 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
22083 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
22091 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
22096 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
22101 gcc_unreachable ();
22105 /* Both IBM format. */
22106 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
22110 gcc_unreachable ();
22112 /* Handle conversion between TFmode/KFmode/IFmode. */
22114 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
22116 /* Handle conversion if we have hardware support. */
22117 else if (TARGET_FLOAT128_HW && hw_convert)
22118 emit_insn ((hw_convert) (dest, src));
22120 /* Call an external function to do the conversion. */
22121 else if (cvt != unknown_optab)
22123 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
22124 gcc_assert (libfunc != NULL_RTX);
22126 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
22129 gcc_assert (dest2 != NULL_RTX);
22130 if (!rtx_equal_p (dest, dest2))
22131 emit_move_insn (dest, dest2);
22135 gcc_unreachable ();
22141 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
22142 can be used as that dest register. Return the dest register. */
22145 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
22147 if (op2 == const0_rtx)
22150 if (GET_CODE (scratch) == SCRATCH)
22151 scratch = gen_reg_rtx (mode);
22153 if (logical_operand (op2, mode))
22154 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
22156 emit_insn (gen_rtx_SET (scratch,
22157 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
22163 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
22166 machine_mode op_mode;
22167 enum rtx_code cond_code;
22168 rtx result = operands[0];
22170 condition_rtx = rs6000_generate_compare (operands[1], mode);
22171 cond_code = GET_CODE (condition_rtx);
22173 if (cond_code == NE
22174 || cond_code == GE || cond_code == LE
22175 || cond_code == GEU || cond_code == LEU
22176 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
22178 rtx not_result = gen_reg_rtx (CCEQmode);
22179 rtx not_op, rev_cond_rtx;
22180 machine_mode cc_mode;
22182 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
22184 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
22185 SImode, XEXP (condition_rtx, 0), const0_rtx);
22186 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
22187 emit_insn (gen_rtx_SET (not_result, not_op));
22188 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
22191 op_mode = GET_MODE (XEXP (operands[1], 0));
22192 if (op_mode == VOIDmode)
22193 op_mode = GET_MODE (XEXP (operands[1], 1));
22195 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
22197 PUT_MODE (condition_rtx, DImode);
22198 convert_move (result, condition_rtx, 0);
22202 PUT_MODE (condition_rtx, SImode);
22203 emit_insn (gen_rtx_SET (result, condition_rtx));
22207 /* Emit a branch of kind CODE to location LOC. */
22210 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
22212 rtx condition_rtx, loc_ref;
22214 condition_rtx = rs6000_generate_compare (operands[0], mode);
22215 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
22216 emit_jump_insn (gen_rtx_SET (pc_rtx,
22217 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
22218 loc_ref, pc_rtx)));
22221 /* Return the string to output a conditional branch to LABEL, which is
22222 the operand template of the label, or NULL if the branch is really a
22223 conditional return.
22225 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
22226 condition code register and its mode specifies what kind of
22227 comparison we made.
22229 REVERSED is nonzero if we should reverse the sense of the comparison.
22231 INSN is the insn. */
22234 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
22236 static char string[64];
22237 enum rtx_code code = GET_CODE (op);
22238 rtx cc_reg = XEXP (op, 0);
22239 machine_mode mode = GET_MODE (cc_reg);
22240 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
22241 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
22242 int really_reversed = reversed ^ need_longbranch;
22248 validate_condition_mode (code, mode);
22250 /* Work out which way this really branches. We could use
22251 reverse_condition_maybe_unordered here always but this
22252 makes the resulting assembler clearer. */
22253 if (really_reversed)
22255 /* Reversal of FP compares takes care -- an ordered compare
22256 becomes an unordered compare and vice versa. */
22257 if (mode == CCFPmode)
22258 code = reverse_condition_maybe_unordered (code);
22260 code = reverse_condition (code);
22265 /* Not all of these are actually distinct opcodes, but
22266 we distinguish them for clarity of the resulting assembler. */
22267 case NE: case LTGT:
22268 ccode = "ne"; break;
22269 case EQ: case UNEQ:
22270 ccode = "eq"; break;
22272 ccode = "ge"; break;
22273 case GT: case GTU: case UNGT:
22274 ccode = "gt"; break;
22276 ccode = "le"; break;
22277 case LT: case LTU: case UNLT:
22278 ccode = "lt"; break;
22279 case UNORDERED: ccode = "un"; break;
22280 case ORDERED: ccode = "nu"; break;
22281 case UNGE: ccode = "nl"; break;
22282 case UNLE: ccode = "ng"; break;
22284 gcc_unreachable ();
22287 /* Maybe we have a guess as to how likely the branch is. */
22289 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
22290 if (note != NULL_RTX)
22292 /* PROB is the difference from 50%. */
22293 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
22294 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
22296 /* Only hint for highly probable/improbable branches on newer cpus when
22297 we have real profile data, as static prediction overrides processor
22298 dynamic prediction. For older cpus we may as well always hint, but
22299 assume not taken for branches that are very close to 50% as a
22300 mispredicted taken branch is more expensive than a
22301 mispredicted not-taken branch. */
22302 if (rs6000_always_hint
22303 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
22304 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
22305 && br_prob_note_reliable_p (note)))
22307 if (abs (prob) > REG_BR_PROB_BASE / 20
22308 && ((prob > 0) ^ need_longbranch))
22316 s += sprintf (s, "b%slr%s ", ccode, pred);
22318 s += sprintf (s, "b%s%s ", ccode, pred);
22320 /* We need to escape any '%' characters in the reg_names string.
22321 Assume they'd only be the first character.... */
22322 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
22324 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
22328 /* If the branch distance was too far, we may have to use an
22329 unconditional branch to go the distance. */
22330 if (need_longbranch)
22331 s += sprintf (s, ",$+8\n\tb %s", label);
22333 s += sprintf (s, ",%s", label);
22339 /* Return insn for VSX or Altivec comparisons. */
22342 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
22345 machine_mode mode = GET_MODE (op0);
22353 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22364 mask = gen_reg_rtx (mode);
22365 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
22372 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
22373 DMODE is expected destination mode. This is a recursive function. */
22376 rs6000_emit_vector_compare (enum rtx_code rcode,
22378 machine_mode dmode)
22381 bool swap_operands = false;
22382 bool try_again = false;
22384 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
22385 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
22387 /* See if the comparison works as is. */
22388 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22396 swap_operands = true;
22401 swap_operands = true;
22409 /* Invert condition and try again.
22410 e.g., A != B becomes ~(A==B). */
22412 enum rtx_code rev_code;
22413 enum insn_code nor_code;
22416 rev_code = reverse_condition_maybe_unordered (rcode);
22417 if (rev_code == UNKNOWN)
22420 nor_code = optab_handler (one_cmpl_optab, dmode);
22421 if (nor_code == CODE_FOR_nothing)
22424 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
22428 mask = gen_reg_rtx (dmode);
22429 emit_insn (GEN_FCN (nor_code) (mask, mask2));
22437 /* Try GT/GTU/LT/LTU OR EQ */
22440 enum insn_code ior_code;
22441 enum rtx_code new_code;
22462 gcc_unreachable ();
22465 ior_code = optab_handler (ior_optab, dmode);
22466 if (ior_code == CODE_FOR_nothing)
22469 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
22473 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
22477 mask = gen_reg_rtx (dmode);
22478 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
22489 std::swap (op0, op1);
22491 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22496 /* You only get two chances. */
22500 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
22501 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
22502 operands for the relation operation COND. */
22505 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
22506 rtx cond, rtx cc_op0, rtx cc_op1)
22508 machine_mode dest_mode = GET_MODE (dest);
22509 machine_mode mask_mode = GET_MODE (cc_op0);
22510 enum rtx_code rcode = GET_CODE (cond);
22511 machine_mode cc_mode = CCmode;
22514 bool invert_move = false;
22516 if (VECTOR_UNIT_NONE_P (dest_mode))
22519 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
22520 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
22524 /* Swap operands if we can, and fall back to doing the operation as
22525 specified, and doing a NOR to invert the test. */
22531 /* Invert condition and try again.
22532 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
22533 invert_move = true;
22534 rcode = reverse_condition_maybe_unordered (rcode);
22535 if (rcode == UNKNOWN)
22541 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
22543 /* Invert condition to avoid compound test. */
22544 invert_move = true;
22545 rcode = reverse_condition (rcode);
22553 /* Mark unsigned tests with CCUNSmode. */
22554 cc_mode = CCUNSmode;
22556 /* Invert condition to avoid compound test if necessary. */
22557 if (rcode == GEU || rcode == LEU)
22559 invert_move = true;
22560 rcode = reverse_condition (rcode);
22568 /* Get the vector mask for the given relational operations. */
22569 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
22575 std::swap (op_true, op_false);
22577 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
22578 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
22579 && (GET_CODE (op_true) == CONST_VECTOR
22580 || GET_CODE (op_false) == CONST_VECTOR))
22582 rtx constant_0 = CONST0_RTX (dest_mode);
22583 rtx constant_m1 = CONSTM1_RTX (dest_mode);
22585 if (op_true == constant_m1 && op_false == constant_0)
22587 emit_move_insn (dest, mask);
22591 else if (op_true == constant_0 && op_false == constant_m1)
22593 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
22597 /* If we can't use the vector comparison directly, perhaps we can use
22598 the mask for the true or false fields, instead of loading up a
22600 if (op_true == constant_m1)
22603 if (op_false == constant_0)
22607 if (!REG_P (op_true) && !SUBREG_P (op_true))
22608 op_true = force_reg (dest_mode, op_true);
22610 if (!REG_P (op_false) && !SUBREG_P (op_false))
22611 op_false = force_reg (dest_mode, op_false);
22613 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
22614 CONST0_RTX (dest_mode));
22615 emit_insn (gen_rtx_SET (dest,
22616 gen_rtx_IF_THEN_ELSE (dest_mode,
22623 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
22624 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
22625 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
22626 hardware has no such operation. */
22629 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22631 enum rtx_code code = GET_CODE (op);
22632 rtx op0 = XEXP (op, 0);
22633 rtx op1 = XEXP (op, 1);
22634 machine_mode compare_mode = GET_MODE (op0);
22635 machine_mode result_mode = GET_MODE (dest);
22636 bool max_p = false;
22638 if (result_mode != compare_mode)
22641 if (code == GE || code == GT)
22643 else if (code == LE || code == LT)
22648 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
22651 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
22657 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
22661 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
22662 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
22663 operands of the last comparison is nonzero/true, FALSE_COND if it is
22664 zero/false. Return 0 if the hardware has no such operation. */
22667 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22669 enum rtx_code code = GET_CODE (op);
22670 rtx op0 = XEXP (op, 0);
22671 rtx op1 = XEXP (op, 1);
22672 machine_mode result_mode = GET_MODE (dest);
22677 if (!can_create_pseudo_p ())
22690 code = swap_condition (code);
22691 std::swap (op0, op1);
22698 /* Generate: [(parallel [(set (dest)
22699 (if_then_else (op (cmp1) (cmp2))
22702 (clobber (scratch))])]. */
22704 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
22705 cmove_rtx = gen_rtx_SET (dest,
22706 gen_rtx_IF_THEN_ELSE (result_mode,
22711 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
22712 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22713 gen_rtvec (2, cmove_rtx, clobber_rtx)));
22718 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
22719 operands of the last comparison is nonzero/true, FALSE_COND if it
22720 is zero/false. Return 0 if the hardware has no such operation. */
22723 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22725 enum rtx_code code = GET_CODE (op);
22726 rtx op0 = XEXP (op, 0);
22727 rtx op1 = XEXP (op, 1);
22728 machine_mode compare_mode = GET_MODE (op0);
22729 machine_mode result_mode = GET_MODE (dest);
22731 bool is_against_zero;
22733 /* These modes should always match. */
22734 if (GET_MODE (op1) != compare_mode
22735 /* In the isel case however, we can use a compare immediate, so
22736 op1 may be a small constant. */
22737 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
22739 if (GET_MODE (true_cond) != result_mode)
22741 if (GET_MODE (false_cond) != result_mode)
22744 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
22745 if (TARGET_P9_MINMAX
22746 && (compare_mode == SFmode || compare_mode == DFmode)
22747 && (result_mode == SFmode || result_mode == DFmode))
22749 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
22752 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
22756 /* Don't allow using floating point comparisons for integer results for
22758 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
22761 /* First, work out if the hardware can do this at all, or
22762 if it's too slow.... */
22763 if (!FLOAT_MODE_P (compare_mode))
22766 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
22770 is_against_zero = op1 == CONST0_RTX (compare_mode);
22772 /* A floating-point subtract might overflow, underflow, or produce
22773 an inexact result, thus changing the floating-point flags, so it
22774 can't be generated if we care about that. It's safe if one side
22775 of the construct is zero, since then no subtract will be
22777 if (SCALAR_FLOAT_MODE_P (compare_mode)
22778 && flag_trapping_math && ! is_against_zero)
22781 /* Eliminate half of the comparisons by switching operands, this
22782 makes the remaining code simpler. */
22783 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
22784 || code == LTGT || code == LT || code == UNLE)
22786 code = reverse_condition_maybe_unordered (code);
22788 true_cond = false_cond;
22792 /* UNEQ and LTGT take four instructions for a comparison with zero,
22793 it'll probably be faster to use a branch here too. */
22794 if (code == UNEQ && HONOR_NANS (compare_mode))
22797 /* We're going to try to implement comparisons by performing
22798 a subtract, then comparing against zero. Unfortunately,
22799 Inf - Inf is NaN which is not zero, and so if we don't
22800 know that the operand is finite and the comparison
22801 would treat EQ different to UNORDERED, we can't do it. */
22802 if (HONOR_INFINITIES (compare_mode)
22803 && code != GT && code != UNGE
22804 && (!CONST_DOUBLE_P (op1)
22805 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
22806 /* Constructs of the form (a OP b ? a : b) are safe. */
22807 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
22808 || (! rtx_equal_p (op0, true_cond)
22809 && ! rtx_equal_p (op1, true_cond))))
22812 /* At this point we know we can use fsel. */
22814 /* Reduce the comparison to a comparison against zero. */
22815 if (! is_against_zero)
22817 temp = gen_reg_rtx (compare_mode);
22818 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
22820 op1 = CONST0_RTX (compare_mode);
22823 /* If we don't care about NaNs we can reduce some of the comparisons
22824 down to faster ones. */
22825 if (! HONOR_NANS (compare_mode))
22831 true_cond = false_cond;
22844 /* Now, reduce everything down to a GE. */
22851 temp = gen_reg_rtx (compare_mode);
22852 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22857 temp = gen_reg_rtx (compare_mode);
22858 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
22863 temp = gen_reg_rtx (compare_mode);
22864 emit_insn (gen_rtx_SET (temp,
22865 gen_rtx_NEG (compare_mode,
22866 gen_rtx_ABS (compare_mode, op0))));
22871 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
22872 temp = gen_reg_rtx (result_mode);
22873 emit_insn (gen_rtx_SET (temp,
22874 gen_rtx_IF_THEN_ELSE (result_mode,
22875 gen_rtx_GE (VOIDmode,
22877 true_cond, false_cond)));
22878 false_cond = true_cond;
22881 temp = gen_reg_rtx (compare_mode);
22882 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22887 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
22888 temp = gen_reg_rtx (result_mode);
22889 emit_insn (gen_rtx_SET (temp,
22890 gen_rtx_IF_THEN_ELSE (result_mode,
22891 gen_rtx_GE (VOIDmode,
22893 true_cond, false_cond)));
22894 true_cond = false_cond;
22897 temp = gen_reg_rtx (compare_mode);
22898 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22903 gcc_unreachable ();
22906 emit_insn (gen_rtx_SET (dest,
22907 gen_rtx_IF_THEN_ELSE (result_mode,
22908 gen_rtx_GE (VOIDmode,
22910 true_cond, false_cond)));
22914 /* Same as above, but for ints (isel). */
22917 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22919 rtx condition_rtx, cr;
22920 machine_mode mode = GET_MODE (dest);
22921 enum rtx_code cond_code;
22922 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
22925 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
22928 /* We still have to do the compare, because isel doesn't do a
22929 compare, it just looks at the CRx bits set by a previous compare
22931 condition_rtx = rs6000_generate_compare (op, mode);
22932 cond_code = GET_CODE (condition_rtx);
22933 cr = XEXP (condition_rtx, 0);
22934 signedp = GET_MODE (cr) == CCmode;
22936 isel_func = (mode == SImode
22937 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
22938 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
22942 case LT: case GT: case LTU: case GTU: case EQ:
22943 /* isel handles these directly. */
22947 /* We need to swap the sense of the comparison. */
22949 std::swap (false_cond, true_cond);
22950 PUT_CODE (condition_rtx, reverse_condition (cond_code));
22955 false_cond = force_reg (mode, false_cond);
22956 if (true_cond != const0_rtx)
22957 true_cond = force_reg (mode, true_cond);
22959 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
22965 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22967 machine_mode mode = GET_MODE (op0);
22971 /* VSX/altivec have direct min/max insns. */
22972 if ((code == SMAX || code == SMIN)
22973 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
22974 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
22976 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
22980 if (code == SMAX || code == SMIN)
22985 if (code == SMAX || code == UMAX)
22986 target = emit_conditional_move (dest, c, op0, op1, mode,
22987 op0, op1, mode, 0);
22989 target = emit_conditional_move (dest, c, op0, op1, mode,
22990 op1, op0, mode, 0);
22991 gcc_assert (target);
22992 if (target != dest)
22993 emit_move_insn (dest, target);
22996 /* A subroutine of the atomic operation splitters. Jump to LABEL if
22997 COND is true. Mark the jump as unlikely to be taken. */
23000 emit_unlikely_jump (rtx cond, rtx label)
23002 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
23003 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
23004 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
23007 /* A subroutine of the atomic operation splitters. Emit a load-locked
23008 instruction in MODE. For QI/HImode, possibly use a pattern than includes
23009 the zero_extend operation. */
23012 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
23014 rtx (*fn) (rtx, rtx) = NULL;
23019 fn = gen_load_lockedqi;
23022 fn = gen_load_lockedhi;
23025 if (GET_MODE (mem) == QImode)
23026 fn = gen_load_lockedqi_si;
23027 else if (GET_MODE (mem) == HImode)
23028 fn = gen_load_lockedhi_si;
23030 fn = gen_load_lockedsi;
23033 fn = gen_load_lockeddi;
23036 fn = gen_load_lockedti;
23039 gcc_unreachable ();
23041 emit_insn (fn (reg, mem));
23044 /* A subroutine of the atomic operation splitters. Emit a store-conditional
23045 instruction in MODE. */
23048 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
23050 rtx (*fn) (rtx, rtx, rtx) = NULL;
23055 fn = gen_store_conditionalqi;
23058 fn = gen_store_conditionalhi;
23061 fn = gen_store_conditionalsi;
23064 fn = gen_store_conditionaldi;
23067 fn = gen_store_conditionalti;
23070 gcc_unreachable ();
23073 /* Emit sync before stwcx. to address PPC405 Erratum. */
23074 if (PPC405_ERRATUM77)
23075 emit_insn (gen_hwsync ());
23077 emit_insn (fn (res, mem, val));
23080 /* Expand barriers before and after a load_locked/store_cond sequence. */
23083 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
23085 rtx addr = XEXP (mem, 0);
23087 if (!legitimate_indirect_address_p (addr, reload_completed)
23088 && !legitimate_indexed_address_p (addr, reload_completed))
23090 addr = force_reg (Pmode, addr);
23091 mem = replace_equiv_address_nv (mem, addr);
23096 case MEMMODEL_RELAXED:
23097 case MEMMODEL_CONSUME:
23098 case MEMMODEL_ACQUIRE:
23100 case MEMMODEL_RELEASE:
23101 case MEMMODEL_ACQ_REL:
23102 emit_insn (gen_lwsync ());
23104 case MEMMODEL_SEQ_CST:
23105 emit_insn (gen_hwsync ());
23108 gcc_unreachable ();
23114 rs6000_post_atomic_barrier (enum memmodel model)
23118 case MEMMODEL_RELAXED:
23119 case MEMMODEL_CONSUME:
23120 case MEMMODEL_RELEASE:
23122 case MEMMODEL_ACQUIRE:
23123 case MEMMODEL_ACQ_REL:
23124 case MEMMODEL_SEQ_CST:
23125 emit_insn (gen_isync ());
23128 gcc_unreachable ();
23132 /* A subroutine of the various atomic expanders. For sub-word operations,
23133 we must adjust things to operate on SImode. Given the original MEM,
23134 return a new aligned memory. Also build and return the quantities by
23135 which to shift and mask. */
23138 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
23140 rtx addr, align, shift, mask, mem;
23141 HOST_WIDE_INT shift_mask;
23142 machine_mode mode = GET_MODE (orig_mem);
23144 /* For smaller modes, we have to implement this via SImode. */
23145 shift_mask = (mode == QImode ? 0x18 : 0x10);
23147 addr = XEXP (orig_mem, 0);
23148 addr = force_reg (GET_MODE (addr), addr);
23150 /* Aligned memory containing subword. Generate a new memory. We
23151 do not want any of the existing MEM_ATTR data, as we're now
23152 accessing memory outside the original object. */
23153 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
23154 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23155 mem = gen_rtx_MEM (SImode, align);
23156 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
23157 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
23158 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
23160 /* Shift amount for subword relative to aligned word. */
23161 shift = gen_reg_rtx (SImode);
23162 addr = gen_lowpart (SImode, addr);
23163 rtx tmp = gen_reg_rtx (SImode);
23164 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
23165 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
23166 if (BYTES_BIG_ENDIAN)
23167 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
23168 shift, 1, OPTAB_LIB_WIDEN);
23171 /* Mask for insertion. */
23172 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
23173 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
23179 /* A subroutine of the various atomic expanders. For sub-word operands,
23180 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
23183 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
23187 x = gen_reg_rtx (SImode);
23188 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
23189 gen_rtx_NOT (SImode, mask),
23192 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
23197 /* A subroutine of the various atomic expanders. For sub-word operands,
23198 extract WIDE to NARROW via SHIFT. */
23201 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
23203 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
23204 wide, 1, OPTAB_LIB_WIDEN);
23205 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
23208 /* Expand an atomic compare and swap operation. */
23211 rs6000_expand_atomic_compare_and_swap (rtx operands[])
23213 rtx boolval, retval, mem, oldval, newval, cond;
23214 rtx label1, label2, x, mask, shift;
23215 machine_mode mode, orig_mode;
23216 enum memmodel mod_s, mod_f;
23219 boolval = operands[0];
23220 retval = operands[1];
23222 oldval = operands[3];
23223 newval = operands[4];
23224 is_weak = (INTVAL (operands[5]) != 0);
23225 mod_s = memmodel_base (INTVAL (operands[6]));
23226 mod_f = memmodel_base (INTVAL (operands[7]));
23227 orig_mode = mode = GET_MODE (mem);
23229 mask = shift = NULL_RTX;
23230 if (mode == QImode || mode == HImode)
23232 /* Before power8, we didn't have access to lbarx/lharx, so generate a
23233 lwarx and shift/mask operations. With power8, we need to do the
23234 comparison in SImode, but the store is still done in QI/HImode. */
23235 oldval = convert_modes (SImode, mode, oldval, 1);
23237 if (!TARGET_SYNC_HI_QI)
23239 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23241 /* Shift and mask OLDVAL into position with the word. */
23242 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
23243 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23245 /* Shift and mask NEWVAL into position within the word. */
23246 newval = convert_modes (SImode, mode, newval, 1);
23247 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
23248 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23251 /* Prepare to adjust the return value. */
23252 retval = gen_reg_rtx (SImode);
23255 else if (reg_overlap_mentioned_p (retval, oldval))
23256 oldval = copy_to_reg (oldval);
23258 if (mode != TImode && !reg_or_short_operand (oldval, mode))
23259 oldval = copy_to_mode_reg (mode, oldval);
23261 if (reg_overlap_mentioned_p (retval, newval))
23262 newval = copy_to_reg (newval);
23264 mem = rs6000_pre_atomic_barrier (mem, mod_s);
23269 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23270 emit_label (XEXP (label1, 0));
23272 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23274 emit_load_locked (mode, retval, mem);
23278 x = expand_simple_binop (SImode, AND, retval, mask,
23279 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23281 cond = gen_reg_rtx (CCmode);
23282 /* If we have TImode, synthesize a comparison. */
23283 if (mode != TImode)
23284 x = gen_rtx_COMPARE (CCmode, x, oldval);
23287 rtx xor1_result = gen_reg_rtx (DImode);
23288 rtx xor2_result = gen_reg_rtx (DImode);
23289 rtx or_result = gen_reg_rtx (DImode);
23290 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
23291 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
23292 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
23293 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
23295 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
23296 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
23297 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
23298 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
23301 emit_insn (gen_rtx_SET (cond, x));
23303 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23304 emit_unlikely_jump (x, label2);
23308 x = rs6000_mask_atomic_subword (retval, newval, mask);
23310 emit_store_conditional (orig_mode, cond, mem, x);
23314 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23315 emit_unlikely_jump (x, label1);
23318 if (!is_mm_relaxed (mod_f))
23319 emit_label (XEXP (label2, 0));
23321 rs6000_post_atomic_barrier (mod_s);
23323 if (is_mm_relaxed (mod_f))
23324 emit_label (XEXP (label2, 0));
23327 rs6000_finish_atomic_subword (operands[1], retval, shift);
23328 else if (mode != GET_MODE (operands[1]))
23329 convert_move (operands[1], retval, 1);
23331 /* In all cases, CR0 contains EQ on success, and NE on failure. */
23332 x = gen_rtx_EQ (SImode, cond, const0_rtx);
23333 emit_insn (gen_rtx_SET (boolval, x));
23336 /* Expand an atomic exchange operation. */
23339 rs6000_expand_atomic_exchange (rtx operands[])
23341 rtx retval, mem, val, cond;
23343 enum memmodel model;
23344 rtx label, x, mask, shift;
23346 retval = operands[0];
23349 model = memmodel_base (INTVAL (operands[3]));
23350 mode = GET_MODE (mem);
23352 mask = shift = NULL_RTX;
23353 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
23355 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23357 /* Shift and mask VAL into position with the word. */
23358 val = convert_modes (SImode, mode, val, 1);
23359 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23360 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23362 /* Prepare to adjust the return value. */
23363 retval = gen_reg_rtx (SImode);
23367 mem = rs6000_pre_atomic_barrier (mem, model);
23369 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23370 emit_label (XEXP (label, 0));
23372 emit_load_locked (mode, retval, mem);
23376 x = rs6000_mask_atomic_subword (retval, val, mask);
23378 cond = gen_reg_rtx (CCmode);
23379 emit_store_conditional (mode, cond, mem, x);
23381 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23382 emit_unlikely_jump (x, label);
23384 rs6000_post_atomic_barrier (model);
23387 rs6000_finish_atomic_subword (operands[0], retval, shift);
23390 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
23391 to perform. MEM is the memory on which to operate. VAL is the second
23392 operand of the binary operator. BEFORE and AFTER are optional locations to
23393 return the value of MEM either before of after the operation. MODEL_RTX
23394 is a CONST_INT containing the memory model to use. */
23397 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
23398 rtx orig_before, rtx orig_after, rtx model_rtx)
23400 enum memmodel model = memmodel_base (INTVAL (model_rtx));
23401 machine_mode mode = GET_MODE (mem);
23402 machine_mode store_mode = mode;
23403 rtx label, x, cond, mask, shift;
23404 rtx before = orig_before, after = orig_after;
23406 mask = shift = NULL_RTX;
23407 /* On power8, we want to use SImode for the operation. On previous systems,
23408 use the operation in a subword and shift/mask to get the proper byte or
23410 if (mode == QImode || mode == HImode)
23412 if (TARGET_SYNC_HI_QI)
23414 val = convert_modes (SImode, mode, val, 1);
23416 /* Prepare to adjust the return value. */
23417 before = gen_reg_rtx (SImode);
23419 after = gen_reg_rtx (SImode);
23424 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23426 /* Shift and mask VAL into position with the word. */
23427 val = convert_modes (SImode, mode, val, 1);
23428 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23429 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23435 /* We've already zero-extended VAL. That is sufficient to
23436 make certain that it does not affect other bits. */
23441 /* If we make certain that all of the other bits in VAL are
23442 set, that will be sufficient to not affect other bits. */
23443 x = gen_rtx_NOT (SImode, mask);
23444 x = gen_rtx_IOR (SImode, x, val);
23445 emit_insn (gen_rtx_SET (val, x));
23452 /* These will all affect bits outside the field and need
23453 adjustment via MASK within the loop. */
23457 gcc_unreachable ();
23460 /* Prepare to adjust the return value. */
23461 before = gen_reg_rtx (SImode);
23463 after = gen_reg_rtx (SImode);
23464 store_mode = mode = SImode;
23468 mem = rs6000_pre_atomic_barrier (mem, model);
23470 label = gen_label_rtx ();
23471 emit_label (label);
23472 label = gen_rtx_LABEL_REF (VOIDmode, label);
23474 if (before == NULL_RTX)
23475 before = gen_reg_rtx (mode);
23477 emit_load_locked (mode, before, mem);
23481 x = expand_simple_binop (mode, AND, before, val,
23482 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23483 after = expand_simple_unop (mode, NOT, x, after, 1);
23487 after = expand_simple_binop (mode, code, before, val,
23488 after, 1, OPTAB_LIB_WIDEN);
23494 x = expand_simple_binop (SImode, AND, after, mask,
23495 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23496 x = rs6000_mask_atomic_subword (before, x, mask);
23498 else if (store_mode != mode)
23499 x = convert_modes (store_mode, mode, x, 1);
23501 cond = gen_reg_rtx (CCmode);
23502 emit_store_conditional (store_mode, cond, mem, x);
23504 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23505 emit_unlikely_jump (x, label);
23507 rs6000_post_atomic_barrier (model);
23511 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
23512 then do the calcuations in a SImode register. */
23514 rs6000_finish_atomic_subword (orig_before, before, shift);
23516 rs6000_finish_atomic_subword (orig_after, after, shift);
23518 else if (store_mode != mode)
23520 /* QImode/HImode on machines with lbarx/lharx where we do the native
23521 operation and then do the calcuations in a SImode register. */
23523 convert_move (orig_before, before, 1);
23525 convert_move (orig_after, after, 1);
23527 else if (orig_after && after != orig_after)
23528 emit_move_insn (orig_after, after);
23531 /* Emit instructions to move SRC to DST. Called by splitters for
23532 multi-register moves. It will emit at most one instruction for
23533 each register that is accessed; that is, it won't emit li/lis pairs
23534 (or equivalent for 64-bit code). One of SRC or DST must be a hard
23538 rs6000_split_multireg_move (rtx dst, rtx src)
23540 /* The register number of the first register being moved. */
23542 /* The mode that is to be moved. */
23544 /* The mode that the move is being done in, and its size. */
23545 machine_mode reg_mode;
23547 /* The number of registers that will be moved. */
23550 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
23551 mode = GET_MODE (dst);
23552 nregs = hard_regno_nregs (reg, mode);
23553 if (FP_REGNO_P (reg))
23554 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
23555 (TARGET_HARD_FLOAT ? DFmode : SFmode);
23556 else if (ALTIVEC_REGNO_P (reg))
23557 reg_mode = V16QImode;
23559 reg_mode = word_mode;
23560 reg_mode_size = GET_MODE_SIZE (reg_mode);
23562 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
23564 /* TDmode residing in FP registers is special, since the ISA requires that
23565 the lower-numbered word of a register pair is always the most significant
23566 word, even in little-endian mode. This does not match the usual subreg
23567 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
23568 the appropriate constituent registers "by hand" in little-endian mode.
23570 Note we do not need to check for destructive overlap here since TDmode
23571 can only reside in even/odd register pairs. */
23572 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
23577 for (i = 0; i < nregs; i++)
23579 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
23580 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
23582 p_src = simplify_gen_subreg (reg_mode, src, mode,
23583 i * reg_mode_size);
23585 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
23586 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
23588 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
23589 i * reg_mode_size);
23591 emit_insn (gen_rtx_SET (p_dst, p_src));
23597 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
23599 /* Move register range backwards, if we might have destructive
23602 for (i = nregs - 1; i >= 0; i--)
23603 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23604 i * reg_mode_size),
23605 simplify_gen_subreg (reg_mode, src, mode,
23606 i * reg_mode_size)));
23612 bool used_update = false;
23613 rtx restore_basereg = NULL_RTX;
23615 if (MEM_P (src) && INT_REGNO_P (reg))
23619 if (GET_CODE (XEXP (src, 0)) == PRE_INC
23620 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
23623 breg = XEXP (XEXP (src, 0), 0);
23624 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
23625 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
23626 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
23627 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23628 src = replace_equiv_address (src, breg);
23630 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
23632 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
23634 rtx basereg = XEXP (XEXP (src, 0), 0);
23637 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
23638 emit_insn (gen_rtx_SET (ndst,
23639 gen_rtx_MEM (reg_mode,
23641 used_update = true;
23644 emit_insn (gen_rtx_SET (basereg,
23645 XEXP (XEXP (src, 0), 1)));
23646 src = replace_equiv_address (src, basereg);
23650 rtx basereg = gen_rtx_REG (Pmode, reg);
23651 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
23652 src = replace_equiv_address (src, basereg);
23656 breg = XEXP (src, 0);
23657 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
23658 breg = XEXP (breg, 0);
23660 /* If the base register we are using to address memory is
23661 also a destination reg, then change that register last. */
23663 && REGNO (breg) >= REGNO (dst)
23664 && REGNO (breg) < REGNO (dst) + nregs)
23665 j = REGNO (breg) - REGNO (dst);
23667 else if (MEM_P (dst) && INT_REGNO_P (reg))
23671 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
23672 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
23675 breg = XEXP (XEXP (dst, 0), 0);
23676 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
23677 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
23678 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
23680 /* We have to update the breg before doing the store.
23681 Use store with update, if available. */
23685 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23686 emit_insn (TARGET_32BIT
23687 ? (TARGET_POWERPC64
23688 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
23689 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
23690 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
23691 used_update = true;
23694 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23695 dst = replace_equiv_address (dst, breg);
23697 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
23698 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
23700 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
23702 rtx basereg = XEXP (XEXP (dst, 0), 0);
23705 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23706 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
23709 used_update = true;
23712 emit_insn (gen_rtx_SET (basereg,
23713 XEXP (XEXP (dst, 0), 1)));
23714 dst = replace_equiv_address (dst, basereg);
23718 rtx basereg = XEXP (XEXP (dst, 0), 0);
23719 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
23720 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
23722 && REG_P (offsetreg)
23723 && REGNO (basereg) != REGNO (offsetreg));
23724 if (REGNO (basereg) == 0)
23726 rtx tmp = offsetreg;
23727 offsetreg = basereg;
23730 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
23731 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
23732 dst = replace_equiv_address (dst, basereg);
23735 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
23736 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
23739 for (i = 0; i < nregs; i++)
23741 /* Calculate index to next subword. */
23746 /* If compiler already emitted move of first word by
23747 store with update, no need to do anything. */
23748 if (j == 0 && used_update)
23751 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23752 j * reg_mode_size),
23753 simplify_gen_subreg (reg_mode, src, mode,
23754 j * reg_mode_size)));
23756 if (restore_basereg != NULL_RTX)
23757 emit_insn (restore_basereg);
23762 /* This page contains routines that are used to determine what the
23763 function prologue and epilogue code will do and write them out. */
23765 /* Determine whether the REG is really used. */
23768 save_reg_p (int reg)
23770 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
23772 /* When calling eh_return, we must return true for all the cases
23773 where conditional_register_usage marks the PIC offset reg
23774 call used or fixed. */
23775 if (crtl->calls_eh_return
23776 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
23777 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
23778 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
23781 /* We need to mark the PIC offset register live for the same
23782 conditions as it is set up in rs6000_emit_prologue, or
23783 otherwise it won't be saved before we clobber it. */
23784 if (TARGET_TOC && TARGET_MINIMAL_TOC
23785 && !constant_pool_empty_p ())
23788 if (DEFAULT_ABI == ABI_V4
23789 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
23790 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
23793 if (DEFAULT_ABI == ABI_DARWIN
23794 && flag_pic && crtl->uses_pic_offset_table)
23798 return !call_used_regs[reg] && df_regs_ever_live_p (reg);
23801 /* Return the first fixed-point register that is required to be
23802 saved. 32 if none. */
23805 first_reg_to_save (void)
23809 /* Find lowest numbered live register. */
23810 for (first_reg = 13; first_reg <= 31; first_reg++)
23811 if (save_reg_p (first_reg))
23817 /* Similar, for FP regs. */
23820 first_fp_reg_to_save (void)
23824 /* Find lowest numbered live register. */
23825 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
23826 if (save_reg_p (first_reg))
23832 /* Similar, for AltiVec regs. */
23835 first_altivec_reg_to_save (void)
23839 /* Stack frame remains as is unless we are in AltiVec ABI. */
23840 if (! TARGET_ALTIVEC_ABI)
23841 return LAST_ALTIVEC_REGNO + 1;
23843 /* On Darwin, the unwind routines are compiled without
23844 TARGET_ALTIVEC, and use save_world to save/restore the
23845 altivec registers when necessary. */
23846 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
23847 && ! TARGET_ALTIVEC)
23848 return FIRST_ALTIVEC_REGNO + 20;
23850 /* Find lowest numbered live register. */
23851 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
23852 if (save_reg_p (i))
23858 /* Return a 32-bit mask of the AltiVec registers we need to set in
23859 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
23860 the 32-bit word is 0. */
23862 static unsigned int
23863 compute_vrsave_mask (void)
23865 unsigned int i, mask = 0;
23867 /* On Darwin, the unwind routines are compiled without
23868 TARGET_ALTIVEC, and use save_world to save/restore the
23869 call-saved altivec registers when necessary. */
23870 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
23871 && ! TARGET_ALTIVEC)
23874 /* First, find out if we use _any_ altivec registers. */
23875 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
23876 if (df_regs_ever_live_p (i))
23877 mask |= ALTIVEC_REG_BIT (i);
23882 /* Next, remove the argument registers from the set. These must
23883 be in the VRSAVE mask set by the caller, so we don't need to add
23884 them in again. More importantly, the mask we compute here is
23885 used to generate CLOBBERs in the set_vrsave insn, and we do not
23886 wish the argument registers to die. */
23887 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
23888 mask &= ~ALTIVEC_REG_BIT (i);
23890 /* Similarly, remove the return value from the set. */
23893 diddle_return_value (is_altivec_return_reg, &yes);
23895 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
23901 /* For a very restricted set of circumstances, we can cut down the
23902 size of prologues/epilogues by calling our own save/restore-the-world
23906 compute_save_world_info (rs6000_stack_t *info)
23908 info->world_save_p = 1;
23910 = (WORLD_SAVE_P (info)
23911 && DEFAULT_ABI == ABI_DARWIN
23912 && !cfun->has_nonlocal_label
23913 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
23914 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
23915 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
23916 && info->cr_save_p);
23918 /* This will not work in conjunction with sibcalls. Make sure there
23919 are none. (This check is expensive, but seldom executed.) */
23920 if (WORLD_SAVE_P (info))
23923 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
23924 if (CALL_P (insn) && SIBLING_CALL_P (insn))
23926 info->world_save_p = 0;
23931 if (WORLD_SAVE_P (info))
23933 /* Even if we're not touching VRsave, make sure there's room on the
23934 stack for it, if it looks like we're calling SAVE_WORLD, which
23935 will attempt to save it. */
23936 info->vrsave_size = 4;
23938 /* If we are going to save the world, we need to save the link register too. */
23939 info->lr_save_p = 1;
23941 /* "Save" the VRsave register too if we're saving the world. */
23942 if (info->vrsave_mask == 0)
23943 info->vrsave_mask = compute_vrsave_mask ();
23945 /* Because the Darwin register save/restore routines only handle
23946 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
23948 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
23949 && (info->first_altivec_reg_save
23950 >= FIRST_SAVED_ALTIVEC_REGNO));
23958 is_altivec_return_reg (rtx reg, void *xyes)
23960 bool *yes = (bool *) xyes;
23961 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
23966 /* Return whether REG is a global user reg or has been specifed by
23967 -ffixed-REG. We should not restore these, and so cannot use
23968 lmw or out-of-line restore functions if there are any. We also
23969 can't save them (well, emit frame notes for them), because frame
23970 unwinding during exception handling will restore saved registers. */
23973 fixed_reg_p (int reg)
23975 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
23976 backend sets it, overriding anything the user might have given. */
23977 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23978 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
23979 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
23980 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
23983 return fixed_regs[reg];
23986 /* Determine the strategy for savings/restoring registers. */
23989 SAVE_MULTIPLE = 0x1,
23990 SAVE_INLINE_GPRS = 0x2,
23991 SAVE_INLINE_FPRS = 0x4,
23992 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
23993 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
23994 SAVE_INLINE_VRS = 0x20,
23995 REST_MULTIPLE = 0x100,
23996 REST_INLINE_GPRS = 0x200,
23997 REST_INLINE_FPRS = 0x400,
23998 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
23999 REST_INLINE_VRS = 0x1000
24003 rs6000_savres_strategy (rs6000_stack_t *info,
24004 bool using_static_chain_p)
24008 /* Select between in-line and out-of-line save and restore of regs.
24009 First, all the obvious cases where we don't use out-of-line. */
24010 if (crtl->calls_eh_return
24011 || cfun->machine->ra_need_lr)
24012 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
24013 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
24014 | SAVE_INLINE_VRS | REST_INLINE_VRS);
24016 if (info->first_gp_reg_save == 32)
24017 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24019 if (info->first_fp_reg_save == 64)
24020 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24022 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
24023 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24025 /* Define cutoff for using out-of-line functions to save registers. */
24026 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
24028 if (!optimize_size)
24030 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24031 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24032 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24036 /* Prefer out-of-line restore if it will exit. */
24037 if (info->first_fp_reg_save > 61)
24038 strategy |= SAVE_INLINE_FPRS;
24039 if (info->first_gp_reg_save > 29)
24041 if (info->first_fp_reg_save == 64)
24042 strategy |= SAVE_INLINE_GPRS;
24044 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24046 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
24047 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24050 else if (DEFAULT_ABI == ABI_DARWIN)
24052 if (info->first_fp_reg_save > 60)
24053 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24054 if (info->first_gp_reg_save > 29)
24055 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24056 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24060 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24061 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
24062 || info->first_fp_reg_save > 61)
24063 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24064 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24065 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24068 /* Don't bother to try to save things out-of-line if r11 is occupied
24069 by the static chain. It would require too much fiddling and the
24070 static chain is rarely used anyway. FPRs are saved w.r.t the stack
24071 pointer on Darwin, and AIX uses r1 or r12. */
24072 if (using_static_chain_p
24073 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
24074 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
24076 | SAVE_INLINE_VRS);
24078 /* Don't ever restore fixed regs. That means we can't use the
24079 out-of-line register restore functions if a fixed reg is in the
24080 range of regs restored. */
24081 if (!(strategy & REST_INLINE_FPRS))
24082 for (int i = info->first_fp_reg_save; i < 64; i++)
24085 strategy |= REST_INLINE_FPRS;
24089 /* We can only use the out-of-line routines to restore fprs if we've
24090 saved all the registers from first_fp_reg_save in the prologue.
24091 Otherwise, we risk loading garbage. Of course, if we have saved
24092 out-of-line then we know we haven't skipped any fprs. */
24093 if ((strategy & SAVE_INLINE_FPRS)
24094 && !(strategy & REST_INLINE_FPRS))
24095 for (int i = info->first_fp_reg_save; i < 64; i++)
24096 if (!save_reg_p (i))
24098 strategy |= REST_INLINE_FPRS;
24102 /* Similarly, for altivec regs. */
24103 if (!(strategy & REST_INLINE_VRS))
24104 for (int i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
24107 strategy |= REST_INLINE_VRS;
24111 if ((strategy & SAVE_INLINE_VRS)
24112 && !(strategy & REST_INLINE_VRS))
24113 for (int i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
24114 if (!save_reg_p (i))
24116 strategy |= REST_INLINE_VRS;
24120 /* info->lr_save_p isn't yet set if the only reason lr needs to be
24121 saved is an out-of-line save or restore. Set up the value for
24122 the next test (excluding out-of-line gprs). */
24123 bool lr_save_p = (info->lr_save_p
24124 || !(strategy & SAVE_INLINE_FPRS)
24125 || !(strategy & SAVE_INLINE_VRS)
24126 || !(strategy & REST_INLINE_FPRS)
24127 || !(strategy & REST_INLINE_VRS));
24129 if (TARGET_MULTIPLE
24130 && !TARGET_POWERPC64
24131 && info->first_gp_reg_save < 31
24132 && !(flag_shrink_wrap
24133 && flag_shrink_wrap_separate
24134 && optimize_function_for_speed_p (cfun)))
24137 for (int i = info->first_gp_reg_save; i < 32; i++)
24138 if (save_reg_p (i))
24142 /* Don't use store multiple if only one reg needs to be
24143 saved. This can occur for example when the ABI_V4 pic reg
24144 (r30) needs to be saved to make calls, but r31 is not
24146 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24149 /* Prefer store multiple for saves over out-of-line
24150 routines, since the store-multiple instruction will
24151 always be smaller. */
24152 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
24154 /* The situation is more complicated with load multiple.
24155 We'd prefer to use the out-of-line routines for restores,
24156 since the "exit" out-of-line routines can handle the
24157 restore of LR and the frame teardown. However if doesn't
24158 make sense to use the out-of-line routine if that is the
24159 only reason we'd need to save LR, and we can't use the
24160 "exit" out-of-line gpr restore if we have saved some
24161 fprs; In those cases it is advantageous to use load
24162 multiple when available. */
24163 if (info->first_fp_reg_save != 64 || !lr_save_p)
24164 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
24168 /* Using the "exit" out-of-line routine does not improve code size
24169 if using it would require lr to be saved and if only saving one
24171 else if (!lr_save_p && info->first_gp_reg_save > 29)
24172 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24174 /* Don't ever restore fixed regs. */
24175 if ((strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
24176 for (int i = info->first_gp_reg_save; i < 32; i++)
24177 if (fixed_reg_p (i))
24179 strategy |= REST_INLINE_GPRS;
24180 strategy &= ~REST_MULTIPLE;
24184 /* We can only use load multiple or the out-of-line routines to
24185 restore gprs if we've saved all the registers from
24186 first_gp_reg_save. Otherwise, we risk loading garbage.
24187 Of course, if we have saved out-of-line or used stmw then we know
24188 we haven't skipped any gprs. */
24189 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
24190 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
24191 for (int i = info->first_gp_reg_save; i < 32; i++)
24192 if (!save_reg_p (i))
24194 strategy |= REST_INLINE_GPRS;
24195 strategy &= ~REST_MULTIPLE;
24199 if (TARGET_ELF && TARGET_64BIT)
24201 if (!(strategy & SAVE_INLINE_FPRS))
24202 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24203 else if (!(strategy & SAVE_INLINE_GPRS)
24204 && info->first_fp_reg_save == 64)
24205 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
24207 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
24208 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
24210 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
24211 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24216 /* Calculate the stack information for the current function. This is
24217 complicated by having two separate calling sequences, the AIX calling
24218 sequence and the V.4 calling sequence.
24220 AIX (and Darwin/Mac OS X) stack frames look like:
24222 SP----> +---------------------------------------+
24223 | back chain to caller | 0 0
24224 +---------------------------------------+
24225 | saved CR | 4 8 (8-11)
24226 +---------------------------------------+
24228 +---------------------------------------+
24229 | reserved for compilers | 12 24
24230 +---------------------------------------+
24231 | reserved for binders | 16 32
24232 +---------------------------------------+
24233 | saved TOC pointer | 20 40
24234 +---------------------------------------+
24235 | Parameter save area (+padding*) (P) | 24 48
24236 +---------------------------------------+
24237 | Alloca space (A) | 24+P etc.
24238 +---------------------------------------+
24239 | Local variable space (L) | 24+P+A
24240 +---------------------------------------+
24241 | Float/int conversion temporary (X) | 24+P+A+L
24242 +---------------------------------------+
24243 | Save area for AltiVec registers (W) | 24+P+A+L+X
24244 +---------------------------------------+
24245 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
24246 +---------------------------------------+
24247 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
24248 +---------------------------------------+
24249 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
24250 +---------------------------------------+
24251 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
24252 +---------------------------------------+
24253 old SP->| back chain to caller's caller |
24254 +---------------------------------------+
24256 * If the alloca area is present, the parameter save area is
24257 padded so that the former starts 16-byte aligned.
24259 The required alignment for AIX configurations is two words (i.e., 8
24262 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
24264 SP----> +---------------------------------------+
24265 | Back chain to caller | 0
24266 +---------------------------------------+
24267 | Save area for CR | 8
24268 +---------------------------------------+
24270 +---------------------------------------+
24271 | Saved TOC pointer | 24
24272 +---------------------------------------+
24273 | Parameter save area (+padding*) (P) | 32
24274 +---------------------------------------+
24275 | Alloca space (A) | 32+P
24276 +---------------------------------------+
24277 | Local variable space (L) | 32+P+A
24278 +---------------------------------------+
24279 | Save area for AltiVec registers (W) | 32+P+A+L
24280 +---------------------------------------+
24281 | AltiVec alignment padding (Y) | 32+P+A+L+W
24282 +---------------------------------------+
24283 | Save area for GP registers (G) | 32+P+A+L+W+Y
24284 +---------------------------------------+
24285 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
24286 +---------------------------------------+
24287 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
24288 +---------------------------------------+
24290 * If the alloca area is present, the parameter save area is
24291 padded so that the former starts 16-byte aligned.
24293 V.4 stack frames look like:
24295 SP----> +---------------------------------------+
24296 | back chain to caller | 0
24297 +---------------------------------------+
24298 | caller's saved LR | 4
24299 +---------------------------------------+
24300 | Parameter save area (+padding*) (P) | 8
24301 +---------------------------------------+
24302 | Alloca space (A) | 8+P
24303 +---------------------------------------+
24304 | Varargs save area (V) | 8+P+A
24305 +---------------------------------------+
24306 | Local variable space (L) | 8+P+A+V
24307 +---------------------------------------+
24308 | Float/int conversion temporary (X) | 8+P+A+V+L
24309 +---------------------------------------+
24310 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
24311 +---------------------------------------+
24312 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
24313 +---------------------------------------+
24314 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
24315 +---------------------------------------+
24316 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
24317 +---------------------------------------+
24318 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
24319 +---------------------------------------+
24320 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
24321 +---------------------------------------+
24322 old SP->| back chain to caller's caller |
24323 +---------------------------------------+
24325 * If the alloca area is present and the required alignment is
24326 16 bytes, the parameter save area is padded so that the
24327 alloca area starts 16-byte aligned.
24329 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
24330 given. (But note below and in sysv4.h that we require only 8 and
24331 may round up the size of our stack frame anyways. The historical
24332 reason is early versions of powerpc-linux which didn't properly
24333 align the stack at program startup. A happy side-effect is that
24334 -mno-eabi libraries can be used with -meabi programs.)
24336 The EABI configuration defaults to the V.4 layout. However,
24337 the stack alignment requirements may differ. If -mno-eabi is not
24338 given, the required stack alignment is 8 bytes; if -mno-eabi is
24339 given, the required alignment is 16 bytes. (But see V.4 comment
24342 #ifndef ABI_STACK_BOUNDARY
24343 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
24346 static rs6000_stack_t *
24347 rs6000_stack_info (void)
24349 /* We should never be called for thunks, we are not set up for that. */
24350 gcc_assert (!cfun->is_thunk);
24352 rs6000_stack_t *info = &stack_info;
24353 int reg_size = TARGET_32BIT ? 4 : 8;
24358 HOST_WIDE_INT non_fixed_size;
24359 bool using_static_chain_p;
24361 if (reload_completed && info->reload_completed)
24364 memset (info, 0, sizeof (*info));
24365 info->reload_completed = reload_completed;
24367 /* Select which calling sequence. */
24368 info->abi = DEFAULT_ABI;
24370 /* Calculate which registers need to be saved & save area size. */
24371 info->first_gp_reg_save = first_reg_to_save ();
24372 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
24373 even if it currently looks like we won't. Reload may need it to
24374 get at a constant; if so, it will have already created a constant
24375 pool entry for it. */
24376 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
24377 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
24378 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
24379 && crtl->uses_const_pool
24380 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
24381 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
24383 first_gp = info->first_gp_reg_save;
24385 info->gp_size = reg_size * (32 - first_gp);
24387 info->first_fp_reg_save = first_fp_reg_to_save ();
24388 info->fp_size = 8 * (64 - info->first_fp_reg_save);
24390 info->first_altivec_reg_save = first_altivec_reg_to_save ();
24391 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
24392 - info->first_altivec_reg_save);
24394 /* Does this function call anything? */
24395 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
24397 /* Determine if we need to save the condition code registers. */
24398 if (save_reg_p (CR2_REGNO)
24399 || save_reg_p (CR3_REGNO)
24400 || save_reg_p (CR4_REGNO))
24402 info->cr_save_p = 1;
24403 if (DEFAULT_ABI == ABI_V4)
24404 info->cr_size = reg_size;
24407 /* If the current function calls __builtin_eh_return, then we need
24408 to allocate stack space for registers that will hold data for
24409 the exception handler. */
24410 if (crtl->calls_eh_return)
24413 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
24416 ehrd_size = i * UNITS_PER_WORD;
24421 /* In the ELFv2 ABI, we also need to allocate space for separate
24422 CR field save areas if the function calls __builtin_eh_return. */
24423 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24425 /* This hard-codes that we have three call-saved CR fields. */
24426 ehcr_size = 3 * reg_size;
24427 /* We do *not* use the regular CR save mechanism. */
24428 info->cr_save_p = 0;
24433 /* Determine various sizes. */
24434 info->reg_size = reg_size;
24435 info->fixed_size = RS6000_SAVE_AREA;
24436 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
24437 if (cfun->calls_alloca)
24439 RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
24440 STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
24442 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
24443 TARGET_ALTIVEC ? 16 : 8);
24444 if (FRAME_GROWS_DOWNWARD)
24446 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
24447 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
24448 - (info->fixed_size + info->vars_size + info->parm_size);
24450 if (TARGET_ALTIVEC_ABI)
24451 info->vrsave_mask = compute_vrsave_mask ();
24453 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
24454 info->vrsave_size = 4;
24456 compute_save_world_info (info);
24458 /* Calculate the offsets. */
24459 switch (DEFAULT_ABI)
24463 gcc_unreachable ();
24468 info->fp_save_offset = -info->fp_size;
24469 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24471 if (TARGET_ALTIVEC_ABI)
24473 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
24475 /* Align stack so vector save area is on a quadword boundary.
24476 The padding goes above the vectors. */
24477 if (info->altivec_size != 0)
24478 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
24480 info->altivec_save_offset = info->vrsave_save_offset
24481 - info->altivec_padding_size
24482 - info->altivec_size;
24483 gcc_assert (info->altivec_size == 0
24484 || info->altivec_save_offset % 16 == 0);
24486 /* Adjust for AltiVec case. */
24487 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
24490 info->ehrd_offset = info->gp_save_offset - ehrd_size;
24492 info->ehcr_offset = info->ehrd_offset - ehcr_size;
24493 info->cr_save_offset = reg_size; /* first word when 64-bit. */
24494 info->lr_save_offset = 2*reg_size;
24498 info->fp_save_offset = -info->fp_size;
24499 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24500 info->cr_save_offset = info->gp_save_offset - info->cr_size;
24502 if (TARGET_ALTIVEC_ABI)
24504 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
24506 /* Align stack so vector save area is on a quadword boundary. */
24507 if (info->altivec_size != 0)
24508 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
24510 info->altivec_save_offset = info->vrsave_save_offset
24511 - info->altivec_padding_size
24512 - info->altivec_size;
24514 /* Adjust for AltiVec case. */
24515 info->ehrd_offset = info->altivec_save_offset;
24518 info->ehrd_offset = info->cr_save_offset;
24520 info->ehrd_offset -= ehrd_size;
24521 info->lr_save_offset = reg_size;
24524 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
24525 info->save_size = RS6000_ALIGN (info->fp_size
24527 + info->altivec_size
24528 + info->altivec_padding_size
24532 + info->vrsave_size,
24535 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
24537 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
24538 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
24540 /* Determine if we need to save the link register. */
24542 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24544 && !TARGET_PROFILE_KERNEL)
24545 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
24546 #ifdef TARGET_RELOCATABLE
24547 || (DEFAULT_ABI == ABI_V4
24548 && (TARGET_RELOCATABLE || flag_pic > 1)
24549 && !constant_pool_empty_p ())
24551 || rs6000_ra_ever_killed ())
24552 info->lr_save_p = 1;
24554 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
24555 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
24556 && call_used_regs[STATIC_CHAIN_REGNUM]);
24557 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
24559 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
24560 || !(info->savres_strategy & SAVE_INLINE_FPRS)
24561 || !(info->savres_strategy & SAVE_INLINE_VRS)
24562 || !(info->savres_strategy & REST_INLINE_GPRS)
24563 || !(info->savres_strategy & REST_INLINE_FPRS)
24564 || !(info->savres_strategy & REST_INLINE_VRS))
24565 info->lr_save_p = 1;
24567 if (info->lr_save_p)
24568 df_set_regs_ever_live (LR_REGNO, true);
24570 /* Determine if we need to allocate any stack frame:
24572 For AIX we need to push the stack if a frame pointer is needed
24573 (because the stack might be dynamically adjusted), if we are
24574 debugging, if we make calls, or if the sum of fp_save, gp_save,
24575 and local variables are more than the space needed to save all
24576 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
24577 + 18*8 = 288 (GPR13 reserved).
24579 For V.4 we don't have the stack cushion that AIX uses, but assume
24580 that the debugger can handle stackless frames. */
24585 else if (DEFAULT_ABI == ABI_V4)
24586 info->push_p = non_fixed_size != 0;
24588 else if (frame_pointer_needed)
24591 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
24595 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
24601 debug_stack_info (rs6000_stack_t *info)
24603 const char *abi_string;
24606 info = rs6000_stack_info ();
24608 fprintf (stderr, "\nStack information for function %s:\n",
24609 ((current_function_decl && DECL_NAME (current_function_decl))
24610 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
24615 default: abi_string = "Unknown"; break;
24616 case ABI_NONE: abi_string = "NONE"; break;
24617 case ABI_AIX: abi_string = "AIX"; break;
24618 case ABI_ELFv2: abi_string = "ELFv2"; break;
24619 case ABI_DARWIN: abi_string = "Darwin"; break;
24620 case ABI_V4: abi_string = "V.4"; break;
24623 fprintf (stderr, "\tABI = %5s\n", abi_string);
24625 if (TARGET_ALTIVEC_ABI)
24626 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
24628 if (info->first_gp_reg_save != 32)
24629 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
24631 if (info->first_fp_reg_save != 64)
24632 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
24634 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
24635 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
24636 info->first_altivec_reg_save);
24638 if (info->lr_save_p)
24639 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
24641 if (info->cr_save_p)
24642 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
24644 if (info->vrsave_mask)
24645 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
24648 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
24651 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
24654 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
24657 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
24659 if (info->altivec_size)
24660 fprintf (stderr, "\taltivec_save_offset = %5d\n",
24661 info->altivec_save_offset);
24663 if (info->vrsave_size)
24664 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
24665 info->vrsave_save_offset);
24667 if (info->lr_save_p)
24668 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
24670 if (info->cr_save_p)
24671 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
24673 if (info->varargs_save_offset)
24674 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
24676 if (info->total_size)
24677 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24680 if (info->vars_size)
24681 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24684 if (info->parm_size)
24685 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
24687 if (info->fixed_size)
24688 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
24691 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
24694 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
24696 if (info->altivec_size)
24697 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
24699 if (info->vrsave_size)
24700 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
24702 if (info->altivec_padding_size)
24703 fprintf (stderr, "\taltivec_padding_size= %5d\n",
24704 info->altivec_padding_size);
24707 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
24709 if (info->save_size)
24710 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
24712 if (info->reg_size != 4)
24713 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
24715 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
24717 if (info->abi == ABI_DARWIN)
24718 fprintf (stderr, "\tWORLD_SAVE_P = %5d\n", WORLD_SAVE_P(info));
24720 fprintf (stderr, "\n");
24724 rs6000_return_addr (int count, rtx frame)
24726 /* We can't use get_hard_reg_initial_val for LR when count == 0 if LR
24727 is trashed by the prologue, as it is for PIC on ABI_V4 and Darwin. */
24729 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
24731 cfun->machine->ra_needs_full_frame = 1;
24734 /* FRAME is set to frame_pointer_rtx by the generic code, but that
24735 is good for loading 0(r1) only when !FRAME_GROWS_DOWNWARD. */
24736 frame = stack_pointer_rtx;
24737 rtx prev_frame_addr = memory_address (Pmode, frame);
24738 rtx prev_frame = copy_to_reg (gen_rtx_MEM (Pmode, prev_frame_addr));
24739 rtx lr_save_off = plus_constant (Pmode,
24740 prev_frame, RETURN_ADDRESS_OFFSET);
24741 rtx lr_save_addr = memory_address (Pmode, lr_save_off);
24742 return gen_rtx_MEM (Pmode, lr_save_addr);
24745 cfun->machine->ra_need_lr = 1;
24746 return get_hard_reg_initial_val (Pmode, LR_REGNO);
24749 /* Say whether a function is a candidate for sibcall handling or not. */
24752 rs6000_function_ok_for_sibcall (tree decl, tree exp)
24756 /* The sibcall epilogue may clobber the static chain register.
24757 ??? We could work harder and avoid that, but it's probably
24758 not worth the hassle in practice. */
24759 if (CALL_EXPR_STATIC_CHAIN (exp))
24763 fntype = TREE_TYPE (decl);
24765 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
24767 /* We can't do it if the called function has more vector parameters
24768 than the current function; there's nowhere to put the VRsave code. */
24769 if (TARGET_ALTIVEC_ABI
24770 && TARGET_ALTIVEC_VRSAVE
24771 && !(decl && decl == current_function_decl))
24773 function_args_iterator args_iter;
24777 /* Functions with vector parameters are required to have a
24778 prototype, so the argument type info must be available
24780 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
24781 if (TREE_CODE (type) == VECTOR_TYPE
24782 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
24785 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
24786 if (TREE_CODE (type) == VECTOR_TYPE
24787 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
24794 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
24795 functions, because the callee may have a different TOC pointer to
24796 the caller and there's no way to ensure we restore the TOC when
24797 we return. With the secure-plt SYSV ABI we can't make non-local
24798 calls when -fpic/PIC because the plt call stubs use r30. */
24799 if (DEFAULT_ABI == ABI_DARWIN
24800 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24802 && !DECL_EXTERNAL (decl)
24803 && !DECL_WEAK (decl)
24804 && (*targetm.binds_local_p) (decl))
24805 || (DEFAULT_ABI == ABI_V4
24806 && (!TARGET_SECURE_PLT
24809 && (*targetm.binds_local_p) (decl)))))
24811 tree attr_list = TYPE_ATTRIBUTES (fntype);
24813 if (!lookup_attribute ("longcall", attr_list)
24814 || lookup_attribute ("shortcall", attr_list))
24822 rs6000_ra_ever_killed (void)
24828 if (cfun->is_thunk)
24831 if (cfun->machine->lr_save_state)
24832 return cfun->machine->lr_save_state - 1;
24834 /* regs_ever_live has LR marked as used if any sibcalls are present,
24835 but this should not force saving and restoring in the
24836 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
24837 clobbers LR, so that is inappropriate. */
24839 /* Also, the prologue can generate a store into LR that
24840 doesn't really count, like this:
24843 bcl to set PIC register
24847 When we're called from the epilogue, we need to avoid counting
24848 this as a store. */
24850 push_topmost_sequence ();
24851 top = get_insns ();
24852 pop_topmost_sequence ();
24853 reg = gen_rtx_REG (Pmode, LR_REGNO);
24855 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
24861 if (!SIBLING_CALL_P (insn))
24864 else if (find_regno_note (insn, REG_INC, LR_REGNO))
24866 else if (set_of (reg, insn) != NULL_RTX
24867 && !prologue_epilogue_contains (insn))
24874 /* Emit instructions needed to load the TOC register.
24875 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
24876 a constant pool; or for SVR4 -fpic. */
24879 rs6000_emit_load_toc_table (int fromprolog)
24882 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
24884 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
24887 rtx lab, tmp1, tmp2, got;
24889 lab = gen_label_rtx ();
24890 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
24891 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24894 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24898 got = rs6000_got_sym ();
24899 tmp1 = tmp2 = dest;
24902 tmp1 = gen_reg_rtx (Pmode);
24903 tmp2 = gen_reg_rtx (Pmode);
24905 emit_insn (gen_load_toc_v4_PIC_1 (lab));
24906 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
24907 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
24908 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
24910 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
24912 emit_insn (gen_load_toc_v4_pic_si ());
24913 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24915 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
24918 rtx temp0 = (fromprolog
24919 ? gen_rtx_REG (Pmode, 0)
24920 : gen_reg_rtx (Pmode));
24926 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
24927 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24929 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
24930 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24932 emit_insn (gen_load_toc_v4_PIC_1 (symF));
24933 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24934 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
24940 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24942 lab = gen_label_rtx ();
24943 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
24944 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24945 if (TARGET_LINK_STACK)
24946 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
24947 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
24949 emit_insn (gen_addsi3 (dest, temp0, dest));
24951 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
24953 /* This is for AIX code running in non-PIC ELF32. */
24954 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24957 emit_insn (gen_elf_high (dest, realsym));
24958 emit_insn (gen_elf_low (dest, dest, realsym));
24962 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24965 emit_insn (gen_load_toc_aix_si (dest));
24967 emit_insn (gen_load_toc_aix_di (dest));
24971 /* Emit instructions to restore the link register after determining where
24972 its value has been stored. */
24975 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
24977 rs6000_stack_t *info = rs6000_stack_info ();
24980 operands[0] = source;
24981 operands[1] = scratch;
24983 if (info->lr_save_p)
24985 rtx frame_rtx = stack_pointer_rtx;
24986 HOST_WIDE_INT sp_offset = 0;
24989 if (frame_pointer_needed
24990 || cfun->calls_alloca
24991 || info->total_size > 32767)
24993 tmp = gen_frame_mem (Pmode, frame_rtx);
24994 emit_move_insn (operands[1], tmp);
24995 frame_rtx = operands[1];
24997 else if (info->push_p)
24998 sp_offset = info->total_size;
25000 tmp = plus_constant (Pmode, frame_rtx,
25001 info->lr_save_offset + sp_offset);
25002 tmp = gen_frame_mem (Pmode, tmp);
25003 emit_move_insn (tmp, operands[0]);
25006 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
25008 /* Freeze lr_save_p. We've just emitted rtl that depends on the
25009 state of lr_save_p so any change from here on would be a bug. In
25010 particular, stop rs6000_ra_ever_killed from considering the SET
25011 of lr we may have added just above. */
25012 cfun->machine->lr_save_state = info->lr_save_p + 1;
25015 static GTY(()) alias_set_type set = -1;
25018 get_TOC_alias_set (void)
25021 set = new_alias_set ();
25025 /* This returns nonzero if the current function uses the TOC. This is
25026 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
25027 is generated by the ABI_V4 load_toc_* patterns.
25028 Return 2 instead of 1 if the load_toc_* pattern is in the function
25029 partition that doesn't start the function. */
25037 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25041 rtx pat = PATTERN (insn);
25044 if (GET_CODE (pat) == PARALLEL)
25045 for (i = 0; i < XVECLEN (pat, 0); i++)
25047 rtx sub = XVECEXP (pat, 0, i);
25048 if (GET_CODE (sub) == USE)
25050 sub = XEXP (sub, 0);
25051 if (GET_CODE (sub) == UNSPEC
25052 && XINT (sub, 1) == UNSPEC_TOC)
25057 else if (crtl->has_bb_partition
25059 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
25067 create_TOC_reference (rtx symbol, rtx largetoc_reg)
25069 rtx tocrel, tocreg, hi;
25071 if (TARGET_DEBUG_ADDR)
25073 if (SYMBOL_REF_P (symbol))
25074 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
25078 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
25079 GET_RTX_NAME (GET_CODE (symbol)));
25080 debug_rtx (symbol);
25084 if (!can_create_pseudo_p ())
25085 df_set_regs_ever_live (TOC_REGISTER, true);
25087 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
25088 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
25089 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
25092 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
25093 if (largetoc_reg != NULL)
25095 emit_move_insn (largetoc_reg, hi);
25098 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
25101 /* Issue assembly directives that create a reference to the given DWARF
25102 FRAME_TABLE_LABEL from the current function section. */
25104 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
25106 fprintf (asm_out_file, "\t.ref %s\n",
25107 (* targetm.strip_name_encoding) (frame_table_label));
25110 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
25111 and the change to the stack pointer. */
25114 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
25121 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25122 if (hard_frame_needed)
25123 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
25124 if (!(REGNO (fp) == STACK_POINTER_REGNUM
25125 || (hard_frame_needed
25126 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
25129 p = rtvec_alloc (i);
25132 rtx mem = gen_frame_mem (BLKmode, regs[i]);
25133 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
25136 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
25139 /* Allocate SIZE_INT bytes on the stack using a store with update style insn
25140 and set the appropriate attributes for the generated insn. Return the
25141 first insn which adjusts the stack pointer or the last insn before
25142 the stack adjustment loop.
25144 SIZE_INT is used to create the CFI note for the allocation.
25146 SIZE_RTX is an rtx containing the size of the adjustment. Note that
25147 since stacks grow to lower addresses its runtime value is -SIZE_INT.
25149 ORIG_SP contains the backchain value that must be stored at *sp. */
25152 rs6000_emit_allocate_stack_1 (HOST_WIDE_INT size_int, rtx orig_sp)
25156 rtx size_rtx = GEN_INT (-size_int);
25157 if (size_int > 32767)
25159 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
25160 /* Need a note here so that try_split doesn't get confused. */
25161 if (get_last_insn () == NULL_RTX)
25162 emit_note (NOTE_INSN_DELETED);
25163 insn = emit_move_insn (tmp_reg, size_rtx);
25164 try_split (PATTERN (insn), insn, 0);
25165 size_rtx = tmp_reg;
25169 insn = emit_insn (gen_movsi_update_stack (stack_pointer_rtx,
25174 insn = emit_insn (gen_movdi_update_stack (stack_pointer_rtx,
25178 rtx par = PATTERN (insn);
25179 gcc_assert (GET_CODE (par) == PARALLEL);
25180 rtx set = XVECEXP (par, 0, 0);
25181 gcc_assert (GET_CODE (set) == SET);
25182 rtx mem = SET_DEST (set);
25183 gcc_assert (MEM_P (mem));
25184 MEM_NOTRAP_P (mem) = 1;
25185 set_mem_alias_set (mem, get_frame_alias_set ());
25187 RTX_FRAME_RELATED_P (insn) = 1;
25188 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25189 gen_rtx_SET (stack_pointer_rtx,
25190 gen_rtx_PLUS (Pmode,
25192 GEN_INT (-size_int))));
25194 /* Emit a blockage to ensure the allocation/probing insns are
25195 not optimized, combined, removed, etc. Add REG_STACK_CHECK
25196 note for similar reasons. */
25197 if (flag_stack_clash_protection)
25199 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
25200 emit_insn (gen_blockage ());
25206 static HOST_WIDE_INT
25207 get_stack_clash_protection_probe_interval (void)
25209 return (HOST_WIDE_INT_1U
25210 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
25213 static HOST_WIDE_INT
25214 get_stack_clash_protection_guard_size (void)
25216 return (HOST_WIDE_INT_1U
25217 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE));
25220 /* Allocate ORIG_SIZE bytes on the stack and probe the newly
25221 allocated space every STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes.
25223 COPY_REG, if non-null, should contain a copy of the original
25224 stack pointer at exit from this function.
25226 This is subtly different than the Ada probing in that it tries hard to
25227 prevent attacks that jump the stack guard. Thus it is never allowed to
25228 allocate more than STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes of stack
25229 space without a suitable probe. */
25231 rs6000_emit_probe_stack_range_stack_clash (HOST_WIDE_INT orig_size,
25234 rtx orig_sp = copy_reg;
25236 HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
25238 /* Round the size down to a multiple of PROBE_INTERVAL. */
25239 HOST_WIDE_INT rounded_size = ROUND_DOWN (orig_size, probe_interval);
25241 /* If explicitly requested,
25242 or the rounded size is not the same as the original size
25243 or the the rounded size is greater than a page,
25244 then we will need a copy of the original stack pointer. */
25245 if (rounded_size != orig_size
25246 || rounded_size > probe_interval
25249 /* If the caller did not request a copy of the incoming stack
25250 pointer, then we use r0 to hold the copy. */
25252 orig_sp = gen_rtx_REG (Pmode, 0);
25253 emit_move_insn (orig_sp, stack_pointer_rtx);
25256 /* There's three cases here.
25258 One is a single probe which is the most common and most efficiently
25259 implemented as it does not have to have a copy of the original
25260 stack pointer if there are no residuals.
25262 Second is unrolled allocation/probes which we use if there's just
25263 a few of them. It needs to save the original stack pointer into a
25264 temporary for use as a source register in the allocation/probe.
25266 Last is a loop. This is the most uncommon case and least efficient. */
25267 rtx_insn *retval = NULL;
25268 if (rounded_size == probe_interval)
25270 retval = rs6000_emit_allocate_stack_1 (probe_interval, stack_pointer_rtx);
25272 dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
25274 else if (rounded_size <= 8 * probe_interval)
25276 /* The ABI requires using the store with update insns to allocate
25277 space and store the backchain into the stack
25279 So we save the current stack pointer into a temporary, then
25280 emit the store-with-update insns to store the saved stack pointer
25281 into the right location in each new page. */
25282 for (int i = 0; i < rounded_size; i += probe_interval)
25285 = rs6000_emit_allocate_stack_1 (probe_interval, orig_sp);
25287 /* Save the first stack adjustment in RETVAL. */
25292 dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
25296 /* Compute the ending address. */
25298 = copy_reg ? gen_rtx_REG (Pmode, 0) : gen_rtx_REG (Pmode, 12);
25299 rtx rs = GEN_INT (-rounded_size);
25301 if (add_operand (rs, Pmode))
25302 insn = emit_insn (gen_add3_insn (end_addr, stack_pointer_rtx, rs));
25305 emit_move_insn (end_addr, GEN_INT (-rounded_size));
25306 insn = emit_insn (gen_add3_insn (end_addr, end_addr,
25307 stack_pointer_rtx));
25308 /* Describe the effect of INSN to the CFI engine. */
25309 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25310 gen_rtx_SET (end_addr,
25311 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25314 RTX_FRAME_RELATED_P (insn) = 1;
25316 /* Emit the loop. */
25318 retval = emit_insn (gen_probe_stack_rangedi (stack_pointer_rtx,
25319 stack_pointer_rtx, orig_sp,
25322 retval = emit_insn (gen_probe_stack_rangesi (stack_pointer_rtx,
25323 stack_pointer_rtx, orig_sp,
25325 RTX_FRAME_RELATED_P (retval) = 1;
25326 /* Describe the effect of INSN to the CFI engine. */
25327 add_reg_note (retval, REG_FRAME_RELATED_EXPR,
25328 gen_rtx_SET (stack_pointer_rtx, end_addr));
25330 /* Emit a blockage to ensure the allocation/probing insns are
25331 not optimized, combined, removed, etc. Other cases handle this
25332 within their call to rs6000_emit_allocate_stack_1. */
25333 emit_insn (gen_blockage ());
25335 dump_stack_clash_frame_info (PROBE_LOOP, rounded_size != orig_size);
25338 if (orig_size != rounded_size)
25340 /* Allocate (and implicitly probe) any residual space. */
25341 HOST_WIDE_INT residual = orig_size - rounded_size;
25343 rtx_insn *insn = rs6000_emit_allocate_stack_1 (residual, orig_sp);
25345 /* If the residual was the only allocation, then we can return the
25346 allocating insn. */
25354 /* Emit the correct code for allocating stack space, as insns.
25355 If COPY_REG, make sure a copy of the old frame is left there.
25356 The generated code may use hard register 0 as a temporary. */
25359 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
25362 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25363 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
25364 rtx todec = gen_int_mode (-size, Pmode);
25366 if (INTVAL (todec) != -size)
25368 warning (0, "stack frame too large");
25369 emit_insn (gen_trap ());
25373 if (crtl->limit_stack)
25375 if (REG_P (stack_limit_rtx)
25376 && REGNO (stack_limit_rtx) > 1
25377 && REGNO (stack_limit_rtx) <= 31)
25380 = gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size));
25383 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, const0_rtx));
25385 else if (SYMBOL_REF_P (stack_limit_rtx)
25387 && DEFAULT_ABI == ABI_V4
25390 rtx toload = gen_rtx_CONST (VOIDmode,
25391 gen_rtx_PLUS (Pmode,
25395 emit_insn (gen_elf_high (tmp_reg, toload));
25396 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
25397 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
25401 warning (0, "stack limit expression is not supported");
25404 if (flag_stack_clash_protection)
25406 if (size < get_stack_clash_protection_guard_size ())
25407 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
25410 rtx_insn *insn = rs6000_emit_probe_stack_range_stack_clash (size,
25413 /* If we asked for a copy with an offset, then we still need add in
25415 if (copy_reg && copy_off)
25416 emit_insn (gen_add3_insn (copy_reg, copy_reg, GEN_INT (copy_off)));
25424 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
25426 emit_move_insn (copy_reg, stack_reg);
25429 /* Since we didn't use gen_frame_mem to generate the MEM, grab
25430 it now and set the alias set/attributes. The above gen_*_update
25431 calls will generate a PARALLEL with the MEM set being the first
25433 insn = rs6000_emit_allocate_stack_1 (size, stack_reg);
25437 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
25439 #if PROBE_INTERVAL > 32768
25440 #error Cannot use indexed addressing mode for stack probing
25443 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
25444 inclusive. These are offsets from the current stack pointer. */
25447 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
25449 /* See if we have a constant small number of probes to generate. If so,
25450 that's the easy case. */
25451 if (first + size <= 32768)
25455 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
25456 it exceeds SIZE. If only one probe is needed, this will not
25457 generate any code. Then probe at FIRST + SIZE. */
25458 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
25459 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25462 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25466 /* Otherwise, do the same as above, but in a loop. Note that we must be
25467 extra careful with variables wrapping around because we might be at
25468 the very top (or the very bottom) of the address space and we have
25469 to be able to handle this case properly; in particular, we use an
25470 equality test for the loop condition. */
25473 HOST_WIDE_INT rounded_size;
25474 rtx r12 = gen_rtx_REG (Pmode, 12);
25475 rtx r0 = gen_rtx_REG (Pmode, 0);
25477 /* Sanity check for the addressing mode we're going to use. */
25478 gcc_assert (first <= 32768);
25480 /* Step 1: round SIZE to the previous multiple of the interval. */
25482 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
25485 /* Step 2: compute initial and final value of the loop counter. */
25487 /* TEST_ADDR = SP + FIRST. */
25488 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
25491 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
25492 if (rounded_size > 32768)
25494 emit_move_insn (r0, GEN_INT (-rounded_size));
25495 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
25498 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
25502 /* Step 3: the loop
25506 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
25509 while (TEST_ADDR != LAST_ADDR)
25511 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
25512 until it is equal to ROUNDED_SIZE. */
25515 emit_insn (gen_probe_stack_rangedi (r12, r12, stack_pointer_rtx, r0));
25517 emit_insn (gen_probe_stack_rangesi (r12, r12, stack_pointer_rtx, r0));
25520 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
25521 that SIZE is equal to ROUNDED_SIZE. */
25523 if (size != rounded_size)
25524 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
25528 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
25529 addresses, not offsets. */
25531 static const char *
25532 output_probe_stack_range_1 (rtx reg1, rtx reg2)
25534 static int labelno = 0;
25538 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25541 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25543 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
25545 xops[1] = GEN_INT (-PROBE_INTERVAL);
25546 output_asm_insn ("addi %0,%0,%1", xops);
25548 /* Probe at TEST_ADDR. */
25549 xops[1] = gen_rtx_REG (Pmode, 0);
25550 output_asm_insn ("stw %1,0(%0)", xops);
25552 /* Test if TEST_ADDR == LAST_ADDR. */
25555 output_asm_insn ("cmpd 0,%0,%1", xops);
25557 output_asm_insn ("cmpw 0,%0,%1", xops);
25560 fputs ("\tbne 0,", asm_out_file);
25561 assemble_name_raw (asm_out_file, loop_lab);
25562 fputc ('\n', asm_out_file);
25567 /* This function is called when rs6000_frame_related is processing
25568 SETs within a PARALLEL, and returns whether the REGNO save ought to
25569 be marked RTX_FRAME_RELATED_P. The PARALLELs involved are those
25570 for out-of-line register save functions, store multiple, and the
25571 Darwin world_save. They may contain registers that don't really
25575 interesting_frame_related_regno (unsigned int regno)
25577 /* Saves apparently of r0 are actually saving LR. It doesn't make
25578 sense to substitute the regno here to test save_reg_p (LR_REGNO).
25579 We *know* LR needs saving, and dwarf2cfi.c is able to deduce that
25580 (set (mem) (r0)) is saving LR from a prior (set (r0) (lr)) marked
25581 as frame related. */
25584 /* If we see CR2 then we are here on a Darwin world save. Saves of
25585 CR2 signify the whole CR is being saved. This is a long-standing
25586 ABI wart fixed by ELFv2. As for r0/lr there is no need to check
25587 that CR needs to be saved. */
25588 if (regno == CR2_REGNO)
25590 /* Omit frame info for any user-defined global regs. If frame info
25591 is supplied for them, frame unwinding will restore a user reg.
25592 Also omit frame info for any reg we don't need to save, as that
25593 bloats frame info and can cause problems with shrink wrapping.
25594 Since global regs won't be seen as needing to be saved, both of
25595 these conditions are covered by save_reg_p. */
25596 return save_reg_p (regno);
25599 /* Probe a range of stack addresses from REG1 to REG3 inclusive. These are
25600 addresses, not offsets.
25602 REG2 contains the backchain that must be stored into *sp at each allocation.
25604 This is subtly different than the Ada probing above in that it tries hard
25605 to prevent attacks that jump the stack guard. Thus, it is never allowed
25606 to allocate more than PROBE_INTERVAL bytes of stack space without a
25609 static const char *
25610 output_probe_stack_range_stack_clash (rtx reg1, rtx reg2, rtx reg3)
25612 static int labelno = 0;
25616 HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
25618 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25620 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25622 /* This allocates and probes. */
25625 xops[2] = GEN_INT (-probe_interval);
25627 output_asm_insn ("stdu %1,%2(%0)", xops);
25629 output_asm_insn ("stwu %1,%2(%0)", xops);
25631 /* Jump to LOOP_LAB if TEST_ADDR != LAST_ADDR. */
25635 output_asm_insn ("cmpd 0,%0,%1", xops);
25637 output_asm_insn ("cmpw 0,%0,%1", xops);
25639 fputs ("\tbne 0,", asm_out_file);
25640 assemble_name_raw (asm_out_file, loop_lab);
25641 fputc ('\n', asm_out_file);
25646 /* Wrapper around the output_probe_stack_range routines. */
25648 output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
25650 if (flag_stack_clash_protection)
25651 return output_probe_stack_range_stack_clash (reg1, reg2, reg3);
25653 return output_probe_stack_range_1 (reg1, reg3);
25656 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
25657 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
25658 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
25659 deduce these equivalences by itself so it wasn't necessary to hold
25660 its hand so much. Don't be tempted to always supply d2_f_d_e with
25661 the actual cfa register, ie. r31 when we are using a hard frame
25662 pointer. That fails when saving regs off r1, and sched moves the
25663 r31 setup past the reg saves. */
25666 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
25667 rtx reg2, rtx repl2)
25671 if (REGNO (reg) == STACK_POINTER_REGNUM)
25673 gcc_checking_assert (val == 0);
25677 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
25680 rtx pat = PATTERN (insn);
25681 if (!repl && !reg2)
25683 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
25684 if (GET_CODE (pat) == PARALLEL)
25685 for (int i = 0; i < XVECLEN (pat, 0); i++)
25686 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25688 rtx set = XVECEXP (pat, 0, i);
25690 if (!REG_P (SET_SRC (set))
25691 || interesting_frame_related_regno (REGNO (SET_SRC (set))))
25692 RTX_FRAME_RELATED_P (set) = 1;
25694 RTX_FRAME_RELATED_P (insn) = 1;
25698 /* We expect that 'pat' is either a SET or a PARALLEL containing
25699 SETs (and possibly other stuff). In a PARALLEL, all the SETs
25700 are important so they all have to be marked RTX_FRAME_RELATED_P.
25701 Call simplify_replace_rtx on the SETs rather than the whole insn
25702 so as to leave the other stuff alone (for example USE of r12). */
25704 set_used_flags (pat);
25705 if (GET_CODE (pat) == SET)
25708 pat = simplify_replace_rtx (pat, reg, repl);
25710 pat = simplify_replace_rtx (pat, reg2, repl2);
25712 else if (GET_CODE (pat) == PARALLEL)
25714 pat = shallow_copy_rtx (pat);
25715 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
25717 for (int i = 0; i < XVECLEN (pat, 0); i++)
25718 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25720 rtx set = XVECEXP (pat, 0, i);
25723 set = simplify_replace_rtx (set, reg, repl);
25725 set = simplify_replace_rtx (set, reg2, repl2);
25726 XVECEXP (pat, 0, i) = set;
25728 if (!REG_P (SET_SRC (set))
25729 || interesting_frame_related_regno (REGNO (SET_SRC (set))))
25730 RTX_FRAME_RELATED_P (set) = 1;
25734 gcc_unreachable ();
25736 RTX_FRAME_RELATED_P (insn) = 1;
25737 add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
25742 /* Returns an insn that has a vrsave set operation with the
25743 appropriate CLOBBERs. */
25746 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
25749 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
25750 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
25753 = gen_rtx_SET (vrsave,
25754 gen_rtx_UNSPEC_VOLATILE (SImode,
25755 gen_rtvec (2, reg, vrsave),
25756 UNSPECV_SET_VRSAVE));
25760 /* We need to clobber the registers in the mask so the scheduler
25761 does not move sets to VRSAVE before sets of AltiVec registers.
25763 However, if the function receives nonlocal gotos, reload will set
25764 all call saved registers live. We will end up with:
25766 (set (reg 999) (mem))
25767 (parallel [ (set (reg vrsave) (unspec blah))
25768 (clobber (reg 999))])
25770 The clobber will cause the store into reg 999 to be dead, and
25771 flow will attempt to delete an epilogue insn. In this case, we
25772 need an unspec use/set of the register. */
25774 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
25775 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
25777 if (!epiloguep || call_used_regs [i])
25778 clobs[nclobs++] = gen_hard_reg_clobber (V4SImode, i);
25781 rtx reg = gen_rtx_REG (V4SImode, i);
25784 = gen_rtx_SET (reg,
25785 gen_rtx_UNSPEC (V4SImode,
25786 gen_rtvec (1, reg), 27));
25790 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
25792 for (i = 0; i < nclobs; ++i)
25793 XVECEXP (insn, 0, i) = clobs[i];
25799 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
25803 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
25804 mem = gen_frame_mem (GET_MODE (reg), addr);
25805 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
25809 gen_frame_load (rtx reg, rtx frame_reg, int offset)
25811 return gen_frame_set (reg, frame_reg, offset, false);
25815 gen_frame_store (rtx reg, rtx frame_reg, int offset)
25817 return gen_frame_set (reg, frame_reg, offset, true);
25820 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
25821 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
25824 emit_frame_save (rtx frame_reg, machine_mode mode,
25825 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
25829 /* Some cases that need register indexed addressing. */
25830 gcc_checking_assert (!(TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
25831 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode)));
25833 reg = gen_rtx_REG (mode, regno);
25834 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
25835 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
25836 NULL_RTX, NULL_RTX);
25839 /* Emit an offset memory reference suitable for a frame store, while
25840 converting to a valid addressing mode. */
25843 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
25845 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, GEN_INT (offset)));
25848 #ifndef TARGET_FIX_AND_CONTINUE
25849 #define TARGET_FIX_AND_CONTINUE 0
25852 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
25853 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
25854 #define LAST_SAVRES_REGISTER 31
25855 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
25866 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
25868 /* Temporary holding space for an out-of-line register save/restore
25870 static char savres_routine_name[30];
25872 /* Return the name for an out-of-line register save/restore routine.
25873 We are saving/restoring GPRs if GPR is true. */
25876 rs6000_savres_routine_name (int regno, int sel)
25878 const char *prefix = "";
25879 const char *suffix = "";
25881 /* Different targets are supposed to define
25882 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
25883 routine name could be defined with:
25885 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
25887 This is a nice idea in practice, but in reality, things are
25888 complicated in several ways:
25890 - ELF targets have save/restore routines for GPRs.
25892 - PPC64 ELF targets have routines for save/restore of GPRs that
25893 differ in what they do with the link register, so having a set
25894 prefix doesn't work. (We only use one of the save routines at
25895 the moment, though.)
25897 - PPC32 elf targets have "exit" versions of the restore routines
25898 that restore the link register and can save some extra space.
25899 These require an extra suffix. (There are also "tail" versions
25900 of the restore routines and "GOT" versions of the save routines,
25901 but we don't generate those at present. Same problems apply,
25904 We deal with all this by synthesizing our own prefix/suffix and
25905 using that for the simple sprintf call shown above. */
25906 if (DEFAULT_ABI == ABI_V4)
25911 if ((sel & SAVRES_REG) == SAVRES_GPR)
25912 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
25913 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25914 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
25915 else if ((sel & SAVRES_REG) == SAVRES_VR)
25916 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
25920 if ((sel & SAVRES_LR))
25923 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25925 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
25926 /* No out-of-line save/restore routines for GPRs on AIX. */
25927 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
25931 if ((sel & SAVRES_REG) == SAVRES_GPR)
25932 prefix = ((sel & SAVRES_SAVE)
25933 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
25934 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
25935 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25937 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
25938 if ((sel & SAVRES_LR))
25939 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
25943 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
25944 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
25947 else if ((sel & SAVRES_REG) == SAVRES_VR)
25948 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
25953 if (DEFAULT_ABI == ABI_DARWIN)
25955 /* The Darwin approach is (slightly) different, in order to be
25956 compatible with code generated by the system toolchain. There is a
25957 single symbol for the start of save sequence, and the code here
25958 embeds an offset into that code on the basis of the first register
25960 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
25961 if ((sel & SAVRES_REG) == SAVRES_GPR)
25962 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
25963 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
25964 (regno - 13) * 4, prefix, regno);
25965 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25966 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
25967 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
25968 else if ((sel & SAVRES_REG) == SAVRES_VR)
25969 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
25970 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
25975 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
25977 return savres_routine_name;
25980 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
25981 We are saving/restoring GPRs if GPR is true. */
25984 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
25986 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
25987 ? info->first_gp_reg_save
25988 : (sel & SAVRES_REG) == SAVRES_FPR
25989 ? info->first_fp_reg_save - 32
25990 : (sel & SAVRES_REG) == SAVRES_VR
25991 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
25996 /* Don't generate bogus routine names. */
25997 gcc_assert (FIRST_SAVRES_REGISTER <= regno
25998 && regno <= LAST_SAVRES_REGISTER
25999 && select >= 0 && select <= 12);
26001 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
26007 name = rs6000_savres_routine_name (regno, sel);
26009 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
26010 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
26011 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
26017 /* Emit a sequence of insns, including a stack tie if needed, for
26018 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
26019 reset the stack pointer, but move the base of the frame into
26020 reg UPDT_REGNO for use by out-of-line register restore routines. */
26023 rs6000_emit_stack_reset (rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
26024 unsigned updt_regno)
26026 /* If there is nothing to do, don't do anything. */
26027 if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
26030 rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
26032 /* This blockage is needed so that sched doesn't decide to move
26033 the sp change before the register restores. */
26034 if (DEFAULT_ABI == ABI_V4)
26035 return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
26036 GEN_INT (frame_off)));
26038 /* If we are restoring registers out-of-line, we will be using the
26039 "exit" variants of the restore routines, which will reset the
26040 stack for us. But we do need to point updt_reg into the
26041 right place for those routines. */
26042 if (frame_off != 0)
26043 return emit_insn (gen_add3_insn (updt_reg_rtx,
26044 frame_reg_rtx, GEN_INT (frame_off)));
26046 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
26051 /* Return the register number used as a pointer by out-of-line
26052 save/restore functions. */
26054 static inline unsigned
26055 ptr_regno_for_savres (int sel)
26057 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26058 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
26059 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
26062 /* Construct a parallel rtx describing the effect of a call to an
26063 out-of-line register save/restore routine, and emit the insn
26064 or jump_insn as appropriate. */
26067 rs6000_emit_savres_rtx (rs6000_stack_t *info,
26068 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
26069 machine_mode reg_mode, int sel)
26072 int offset, start_reg, end_reg, n_regs, use_reg;
26073 int reg_size = GET_MODE_SIZE (reg_mode);
26080 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26081 ? info->first_gp_reg_save
26082 : (sel & SAVRES_REG) == SAVRES_FPR
26083 ? info->first_fp_reg_save
26084 : (sel & SAVRES_REG) == SAVRES_VR
26085 ? info->first_altivec_reg_save
26087 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26089 : (sel & SAVRES_REG) == SAVRES_FPR
26091 : (sel & SAVRES_REG) == SAVRES_VR
26092 ? LAST_ALTIVEC_REGNO + 1
26094 n_regs = end_reg - start_reg;
26095 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
26096 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
26099 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26100 RTVEC_ELT (p, offset++) = ret_rtx;
26102 RTVEC_ELT (p, offset++) = gen_hard_reg_clobber (Pmode, LR_REGNO);
26104 sym = rs6000_savres_routine_sym (info, sel);
26105 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
26107 use_reg = ptr_regno_for_savres (sel);
26108 if ((sel & SAVRES_REG) == SAVRES_VR)
26110 /* Vector regs are saved/restored using [reg+reg] addressing. */
26111 RTVEC_ELT (p, offset++) = gen_hard_reg_clobber (Pmode, use_reg);
26112 RTVEC_ELT (p, offset++)
26113 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
26116 RTVEC_ELT (p, offset++)
26117 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
26119 for (i = 0; i < end_reg - start_reg; i++)
26120 RTVEC_ELT (p, i + offset)
26121 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
26122 frame_reg_rtx, save_area_offset + reg_size * i,
26123 (sel & SAVRES_SAVE) != 0);
26125 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26126 RTVEC_ELT (p, i + offset)
26127 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
26129 par = gen_rtx_PARALLEL (VOIDmode, p);
26131 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26133 insn = emit_jump_insn (par);
26134 JUMP_LABEL (insn) = ret_rtx;
26137 insn = emit_insn (par);
26141 /* Emit prologue code to store CR fields that need to be saved into REG. This
26142 function should only be called when moving the non-volatile CRs to REG, it
26143 is not a general purpose routine to move the entire set of CRs to REG.
26144 Specifically, gen_prologue_movesi_from_cr() does not contain uses of the
26148 rs6000_emit_prologue_move_from_cr (rtx reg)
26150 /* Only the ELFv2 ABI allows storing only selected fields. */
26151 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
26153 int i, cr_reg[8], count = 0;
26155 /* Collect CR fields that must be saved. */
26156 for (i = 0; i < 8; i++)
26157 if (save_reg_p (CR0_REGNO + i))
26158 cr_reg[count++] = i;
26160 /* If it's just a single one, use mfcrf. */
26163 rtvec p = rtvec_alloc (1);
26164 rtvec r = rtvec_alloc (2);
26165 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
26166 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
26168 = gen_rtx_SET (reg,
26169 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
26171 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26175 /* ??? It might be better to handle count == 2 / 3 cases here
26176 as well, using logical operations to combine the values. */
26179 emit_insn (gen_prologue_movesi_from_cr (reg));
26182 /* Return whether the split-stack arg pointer (r12) is used. */
26185 split_stack_arg_pointer_used_p (void)
26187 /* If the pseudo holding the arg pointer is no longer a pseudo,
26188 then the arg pointer is used. */
26189 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
26190 && (!REG_P (cfun->machine->split_stack_arg_pointer)
26191 || HARD_REGISTER_P (cfun->machine->split_stack_arg_pointer)))
26194 /* Unfortunately we also need to do some code scanning, since
26195 r12 may have been substituted for the pseudo. */
26197 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
26198 FOR_BB_INSNS (bb, insn)
26199 if (NONDEBUG_INSN_P (insn))
26201 /* A call destroys r12. */
26206 FOR_EACH_INSN_USE (use, insn)
26208 rtx x = DF_REF_REG (use);
26209 if (REG_P (x) && REGNO (x) == 12)
26213 FOR_EACH_INSN_DEF (def, insn)
26215 rtx x = DF_REF_REG (def);
26216 if (REG_P (x) && REGNO (x) == 12)
26220 return bitmap_bit_p (DF_LR_OUT (bb), 12);
26223 /* Return whether we need to emit an ELFv2 global entry point prologue. */
26226 rs6000_global_entry_point_needed_p (void)
26228 /* Only needed for the ELFv2 ABI. */
26229 if (DEFAULT_ABI != ABI_ELFv2)
26232 /* With -msingle-pic-base, we assume the whole program shares the same
26233 TOC, so no global entry point prologues are needed anywhere. */
26234 if (TARGET_SINGLE_PIC_BASE)
26237 /* Ensure we have a global entry point for thunks. ??? We could
26238 avoid that if the target routine doesn't need a global entry point,
26239 but we do not know whether this is the case at this point. */
26240 if (cfun->is_thunk)
26243 /* For regular functions, rs6000_emit_prologue sets this flag if the
26244 routine ever uses the TOC pointer. */
26245 return cfun->machine->r2_setup_needed;
26248 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
26250 rs6000_get_separate_components (void)
26252 rs6000_stack_t *info = rs6000_stack_info ();
26254 if (WORLD_SAVE_P (info))
26257 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
26258 && !(info->savres_strategy & REST_MULTIPLE));
26260 /* Component 0 is the save/restore of LR (done via GPR0).
26261 Component 2 is the save of the TOC (GPR2).
26262 Components 13..31 are the save/restore of GPR13..GPR31.
26263 Components 46..63 are the save/restore of FPR14..FPR31. */
26265 cfun->machine->n_components = 64;
26267 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
26268 bitmap_clear (components);
26270 int reg_size = TARGET_32BIT ? 4 : 8;
26271 int fp_reg_size = 8;
26273 /* The GPRs we need saved to the frame. */
26274 if ((info->savres_strategy & SAVE_INLINE_GPRS)
26275 && (info->savres_strategy & REST_INLINE_GPRS))
26277 int offset = info->gp_save_offset;
26279 offset += info->total_size;
26281 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
26283 if (IN_RANGE (offset, -0x8000, 0x7fff)
26284 && save_reg_p (regno))
26285 bitmap_set_bit (components, regno);
26287 offset += reg_size;
26291 /* Don't mess with the hard frame pointer. */
26292 if (frame_pointer_needed)
26293 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
26295 /* Don't mess with the fixed TOC register. */
26296 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
26297 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
26298 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
26299 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
26301 /* The FPRs we need saved to the frame. */
26302 if ((info->savres_strategy & SAVE_INLINE_FPRS)
26303 && (info->savres_strategy & REST_INLINE_FPRS))
26305 int offset = info->fp_save_offset;
26307 offset += info->total_size;
26309 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
26311 if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno))
26312 bitmap_set_bit (components, regno);
26314 offset += fp_reg_size;
26318 /* Optimize LR save and restore if we can. This is component 0. Any
26319 out-of-line register save/restore routines need LR. */
26320 if (info->lr_save_p
26321 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
26322 && (info->savres_strategy & SAVE_INLINE_GPRS)
26323 && (info->savres_strategy & REST_INLINE_GPRS)
26324 && (info->savres_strategy & SAVE_INLINE_FPRS)
26325 && (info->savres_strategy & REST_INLINE_FPRS)
26326 && (info->savres_strategy & SAVE_INLINE_VRS)
26327 && (info->savres_strategy & REST_INLINE_VRS))
26329 int offset = info->lr_save_offset;
26331 offset += info->total_size;
26332 if (IN_RANGE (offset, -0x8000, 0x7fff))
26333 bitmap_set_bit (components, 0);
26336 /* Optimize saving the TOC. This is component 2. */
26337 if (cfun->machine->save_toc_in_prologue)
26338 bitmap_set_bit (components, 2);
26343 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
26345 rs6000_components_for_bb (basic_block bb)
26347 rs6000_stack_t *info = rs6000_stack_info ();
26349 bitmap in = DF_LIVE_IN (bb);
26350 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
26351 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
26353 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
26354 bitmap_clear (components);
26356 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
26359 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
26360 if (bitmap_bit_p (in, regno)
26361 || bitmap_bit_p (gen, regno)
26362 || bitmap_bit_p (kill, regno))
26363 bitmap_set_bit (components, regno);
26366 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
26367 if (bitmap_bit_p (in, regno)
26368 || bitmap_bit_p (gen, regno)
26369 || bitmap_bit_p (kill, regno))
26370 bitmap_set_bit (components, regno);
26372 /* The link register. */
26373 if (bitmap_bit_p (in, LR_REGNO)
26374 || bitmap_bit_p (gen, LR_REGNO)
26375 || bitmap_bit_p (kill, LR_REGNO))
26376 bitmap_set_bit (components, 0);
26378 /* The TOC save. */
26379 if (bitmap_bit_p (in, TOC_REGNUM)
26380 || bitmap_bit_p (gen, TOC_REGNUM)
26381 || bitmap_bit_p (kill, TOC_REGNUM))
26382 bitmap_set_bit (components, 2);
26387 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
26389 rs6000_disqualify_components (sbitmap components, edge e,
26390 sbitmap edge_components, bool /*is_prologue*/)
26392 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
26393 live where we want to place that code. */
26394 if (bitmap_bit_p (edge_components, 0)
26395 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
26398 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
26399 "on entry to bb %d\n", e->dest->index);
26400 bitmap_clear_bit (components, 0);
26404 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
26406 rs6000_emit_prologue_components (sbitmap components)
26408 rs6000_stack_t *info = rs6000_stack_info ();
26409 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26410 ? HARD_FRAME_POINTER_REGNUM
26411 : STACK_POINTER_REGNUM);
26413 machine_mode reg_mode = Pmode;
26414 int reg_size = TARGET_32BIT ? 4 : 8;
26415 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
26416 int fp_reg_size = 8;
26418 /* Prologue for LR. */
26419 if (bitmap_bit_p (components, 0))
26421 rtx lr = gen_rtx_REG (reg_mode, LR_REGNO);
26422 rtx reg = gen_rtx_REG (reg_mode, 0);
26423 rtx_insn *insn = emit_move_insn (reg, lr);
26424 RTX_FRAME_RELATED_P (insn) = 1;
26425 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (reg, lr));
26427 int offset = info->lr_save_offset;
26429 offset += info->total_size;
26431 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26432 RTX_FRAME_RELATED_P (insn) = 1;
26433 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
26434 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
26437 /* Prologue for TOC. */
26438 if (bitmap_bit_p (components, 2))
26440 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
26441 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26442 emit_insn (gen_frame_store (reg, sp_reg, RS6000_TOC_SAVE_SLOT));
26445 /* Prologue for the GPRs. */
26446 int offset = info->gp_save_offset;
26448 offset += info->total_size;
26450 for (int i = info->first_gp_reg_save; i < 32; i++)
26452 if (bitmap_bit_p (components, i))
26454 rtx reg = gen_rtx_REG (reg_mode, i);
26455 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26456 RTX_FRAME_RELATED_P (insn) = 1;
26457 rtx set = copy_rtx (single_set (insn));
26458 add_reg_note (insn, REG_CFA_OFFSET, set);
26461 offset += reg_size;
26464 /* Prologue for the FPRs. */
26465 offset = info->fp_save_offset;
26467 offset += info->total_size;
26469 for (int i = info->first_fp_reg_save; i < 64; i++)
26471 if (bitmap_bit_p (components, i))
26473 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26474 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26475 RTX_FRAME_RELATED_P (insn) = 1;
26476 rtx set = copy_rtx (single_set (insn));
26477 add_reg_note (insn, REG_CFA_OFFSET, set);
26480 offset += fp_reg_size;
26484 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
26486 rs6000_emit_epilogue_components (sbitmap components)
26488 rs6000_stack_t *info = rs6000_stack_info ();
26489 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26490 ? HARD_FRAME_POINTER_REGNUM
26491 : STACK_POINTER_REGNUM);
26493 machine_mode reg_mode = Pmode;
26494 int reg_size = TARGET_32BIT ? 4 : 8;
26496 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
26497 int fp_reg_size = 8;
26499 /* Epilogue for the FPRs. */
26500 int offset = info->fp_save_offset;
26502 offset += info->total_size;
26504 for (int i = info->first_fp_reg_save; i < 64; i++)
26506 if (bitmap_bit_p (components, i))
26508 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26509 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26510 RTX_FRAME_RELATED_P (insn) = 1;
26511 add_reg_note (insn, REG_CFA_RESTORE, reg);
26514 offset += fp_reg_size;
26517 /* Epilogue for the GPRs. */
26518 offset = info->gp_save_offset;
26520 offset += info->total_size;
26522 for (int i = info->first_gp_reg_save; i < 32; i++)
26524 if (bitmap_bit_p (components, i))
26526 rtx reg = gen_rtx_REG (reg_mode, i);
26527 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26528 RTX_FRAME_RELATED_P (insn) = 1;
26529 add_reg_note (insn, REG_CFA_RESTORE, reg);
26532 offset += reg_size;
26535 /* Epilogue for LR. */
26536 if (bitmap_bit_p (components, 0))
26538 int offset = info->lr_save_offset;
26540 offset += info->total_size;
26542 rtx reg = gen_rtx_REG (reg_mode, 0);
26543 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26545 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26546 insn = emit_move_insn (lr, reg);
26547 RTX_FRAME_RELATED_P (insn) = 1;
26548 add_reg_note (insn, REG_CFA_RESTORE, lr);
26552 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
26554 rs6000_set_handled_components (sbitmap components)
26556 rs6000_stack_t *info = rs6000_stack_info ();
26558 for (int i = info->first_gp_reg_save; i < 32; i++)
26559 if (bitmap_bit_p (components, i))
26560 cfun->machine->gpr_is_wrapped_separately[i] = true;
26562 for (int i = info->first_fp_reg_save; i < 64; i++)
26563 if (bitmap_bit_p (components, i))
26564 cfun->machine->fpr_is_wrapped_separately[i - 32] = true;
26566 if (bitmap_bit_p (components, 0))
26567 cfun->machine->lr_is_wrapped_separately = true;
26569 if (bitmap_bit_p (components, 2))
26570 cfun->machine->toc_is_wrapped_separately = true;
26573 /* VRSAVE is a bit vector representing which AltiVec registers
26574 are used. The OS uses this to determine which vector
26575 registers to save on a context switch. We need to save
26576 VRSAVE on the stack frame, add whatever AltiVec registers we
26577 used in this function, and do the corresponding magic in the
26580 emit_vrsave_prologue (rs6000_stack_t *info, int save_regno,
26581 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26583 /* Get VRSAVE into a GPR. */
26584 rtx reg = gen_rtx_REG (SImode, save_regno);
26585 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
26587 emit_insn (gen_get_vrsave_internal (reg));
26589 emit_insn (gen_rtx_SET (reg, vrsave));
26592 int offset = info->vrsave_save_offset + frame_off;
26593 emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
26595 /* Include the registers in the mask. */
26596 emit_insn (gen_iorsi3 (reg, reg, GEN_INT (info->vrsave_mask)));
26598 emit_insn (generate_set_vrsave (reg, info, 0));
26601 /* Set up the arg pointer (r12) for -fsplit-stack code. If __morestack was
26602 called, it left the arg pointer to the old stack in r29. Otherwise, the
26603 arg pointer is the top of the current frame. */
26605 emit_split_stack_prologue (rs6000_stack_t *info, rtx_insn *sp_adjust,
26606 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26608 cfun->machine->split_stack_argp_used = true;
26612 rtx r12 = gen_rtx_REG (Pmode, 12);
26613 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26614 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
26615 emit_insn_before (set_r12, sp_adjust);
26617 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
26619 rtx r12 = gen_rtx_REG (Pmode, 12);
26620 if (frame_off == 0)
26621 emit_move_insn (r12, frame_reg_rtx);
26623 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
26628 rtx r12 = gen_rtx_REG (Pmode, 12);
26629 rtx r29 = gen_rtx_REG (Pmode, 29);
26630 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
26631 rtx not_more = gen_label_rtx ();
26634 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26635 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
26636 gen_rtx_LABEL_REF (VOIDmode, not_more),
26638 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26639 JUMP_LABEL (jump) = not_more;
26640 LABEL_NUSES (not_more) += 1;
26641 emit_move_insn (r12, r29);
26642 emit_label (not_more);
26646 /* Emit function prologue as insns. */
26649 rs6000_emit_prologue (void)
26651 rs6000_stack_t *info = rs6000_stack_info ();
26652 machine_mode reg_mode = Pmode;
26653 int reg_size = TARGET_32BIT ? 4 : 8;
26654 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
26655 int fp_reg_size = 8;
26656 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26657 rtx frame_reg_rtx = sp_reg_rtx;
26658 unsigned int cr_save_regno;
26659 rtx cr_save_rtx = NULL_RTX;
26662 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
26663 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
26664 && call_used_regs[STATIC_CHAIN_REGNUM]);
26665 int using_split_stack = (flag_split_stack
26666 && (lookup_attribute ("no_split_stack",
26667 DECL_ATTRIBUTES (cfun->decl))
26670 /* Offset to top of frame for frame_reg and sp respectively. */
26671 HOST_WIDE_INT frame_off = 0;
26672 HOST_WIDE_INT sp_off = 0;
26673 /* sp_adjust is the stack adjusting instruction, tracked so that the
26674 insn setting up the split-stack arg pointer can be emitted just
26675 prior to it, when r12 is not used here for other purposes. */
26676 rtx_insn *sp_adjust = 0;
26679 /* Track and check usage of r0, r11, r12. */
26680 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
26681 #define START_USE(R) do \
26683 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26684 reg_inuse |= 1 << (R); \
26686 #define END_USE(R) do \
26688 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
26689 reg_inuse &= ~(1 << (R)); \
26691 #define NOT_INUSE(R) do \
26693 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26696 #define START_USE(R) do {} while (0)
26697 #define END_USE(R) do {} while (0)
26698 #define NOT_INUSE(R) do {} while (0)
26701 if (DEFAULT_ABI == ABI_ELFv2
26702 && !TARGET_SINGLE_PIC_BASE)
26704 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
26706 /* With -mminimal-toc we may generate an extra use of r2 below. */
26707 if (TARGET_TOC && TARGET_MINIMAL_TOC
26708 && !constant_pool_empty_p ())
26709 cfun->machine->r2_setup_needed = true;
26713 if (flag_stack_usage_info)
26714 current_function_static_stack_size = info->total_size;
26716 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
26718 HOST_WIDE_INT size = info->total_size;
26720 if (crtl->is_leaf && !cfun->calls_alloca)
26722 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
26723 rs6000_emit_probe_stack_range (get_stack_check_protect (),
26724 size - get_stack_check_protect ());
26727 rs6000_emit_probe_stack_range (get_stack_check_protect (), size);
26730 if (TARGET_FIX_AND_CONTINUE)
26732 /* gdb on darwin arranges to forward a function from the old
26733 address by modifying the first 5 instructions of the function
26734 to branch to the overriding function. This is necessary to
26735 permit function pointers that point to the old function to
26736 actually forward to the new function. */
26737 emit_insn (gen_nop ());
26738 emit_insn (gen_nop ());
26739 emit_insn (gen_nop ());
26740 emit_insn (gen_nop ());
26741 emit_insn (gen_nop ());
26744 /* Handle world saves specially here. */
26745 if (WORLD_SAVE_P (info))
26752 /* save_world expects lr in r0. */
26753 reg0 = gen_rtx_REG (Pmode, 0);
26754 if (info->lr_save_p)
26756 insn = emit_move_insn (reg0,
26757 gen_rtx_REG (Pmode, LR_REGNO));
26758 RTX_FRAME_RELATED_P (insn) = 1;
26761 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
26762 assumptions about the offsets of various bits of the stack
26764 gcc_assert (info->gp_save_offset == -220
26765 && info->fp_save_offset == -144
26766 && info->lr_save_offset == 8
26767 && info->cr_save_offset == 4
26770 && (!crtl->calls_eh_return
26771 || info->ehrd_offset == -432)
26772 && info->vrsave_save_offset == -224
26773 && info->altivec_save_offset == -416);
26775 treg = gen_rtx_REG (SImode, 11);
26776 emit_move_insn (treg, GEN_INT (-info->total_size));
26778 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
26779 in R11. It also clobbers R12, so beware! */
26781 /* Preserve CR2 for save_world prologues */
26783 sz += 32 - info->first_gp_reg_save;
26784 sz += 64 - info->first_fp_reg_save;
26785 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
26786 p = rtvec_alloc (sz);
26788 RTVEC_ELT (p, j++) = gen_hard_reg_clobber (SImode, LR_REGNO);
26789 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
26790 gen_rtx_SYMBOL_REF (Pmode,
26792 /* We do floats first so that the instruction pattern matches
26794 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
26796 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT ? DFmode : SFmode,
26797 info->first_fp_reg_save + i),
26799 info->fp_save_offset + frame_off + 8 * i);
26800 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
26802 = gen_frame_store (gen_rtx_REG (V4SImode,
26803 info->first_altivec_reg_save + i),
26805 info->altivec_save_offset + frame_off + 16 * i);
26806 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26808 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26810 info->gp_save_offset + frame_off + reg_size * i);
26812 /* CR register traditionally saved as CR2. */
26814 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
26815 frame_reg_rtx, info->cr_save_offset + frame_off);
26816 /* Explain about use of R0. */
26817 if (info->lr_save_p)
26819 = gen_frame_store (reg0,
26820 frame_reg_rtx, info->lr_save_offset + frame_off);
26821 /* Explain what happens to the stack pointer. */
26823 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
26824 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
26827 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26828 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26829 treg, GEN_INT (-info->total_size));
26830 sp_off = frame_off = info->total_size;
26833 strategy = info->savres_strategy;
26835 /* For V.4, update stack before we do any saving and set back pointer. */
26836 if (! WORLD_SAVE_P (info)
26838 && (DEFAULT_ABI == ABI_V4
26839 || crtl->calls_eh_return))
26841 bool need_r11 = (!(strategy & SAVE_INLINE_FPRS)
26842 || !(strategy & SAVE_INLINE_GPRS)
26843 || !(strategy & SAVE_INLINE_VRS));
26844 int ptr_regno = -1;
26845 rtx ptr_reg = NULL_RTX;
26848 if (info->total_size < 32767)
26849 frame_off = info->total_size;
26852 else if (info->cr_save_p
26854 || info->first_fp_reg_save < 64
26855 || info->first_gp_reg_save < 32
26856 || info->altivec_size != 0
26857 || info->vrsave_size != 0
26858 || crtl->calls_eh_return)
26862 /* The prologue won't be saving any regs so there is no need
26863 to set up a frame register to access any frame save area.
26864 We also won't be using frame_off anywhere below, but set
26865 the correct value anyway to protect against future
26866 changes to this function. */
26867 frame_off = info->total_size;
26869 if (ptr_regno != -1)
26871 /* Set up the frame offset to that needed by the first
26872 out-of-line save function. */
26873 START_USE (ptr_regno);
26874 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26875 frame_reg_rtx = ptr_reg;
26876 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
26877 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
26878 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
26879 ptr_off = info->gp_save_offset + info->gp_size;
26880 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
26881 ptr_off = info->altivec_save_offset + info->altivec_size;
26882 frame_off = -ptr_off;
26884 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
26886 if (REGNO (frame_reg_rtx) == 12)
26888 sp_off = info->total_size;
26889 if (frame_reg_rtx != sp_reg_rtx)
26890 rs6000_emit_stack_tie (frame_reg_rtx, false);
26893 /* If we use the link register, get it into r0. */
26894 if (!WORLD_SAVE_P (info) && info->lr_save_p
26895 && !cfun->machine->lr_is_wrapped_separately)
26897 rtx addr, reg, mem;
26899 reg = gen_rtx_REG (Pmode, 0);
26901 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
26902 RTX_FRAME_RELATED_P (insn) = 1;
26904 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
26905 | SAVE_NOINLINE_FPRS_SAVES_LR)))
26907 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
26908 GEN_INT (info->lr_save_offset + frame_off));
26909 mem = gen_rtx_MEM (Pmode, addr);
26910 /* This should not be of rs6000_sr_alias_set, because of
26911 __builtin_return_address. */
26913 insn = emit_move_insn (mem, reg);
26914 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26915 NULL_RTX, NULL_RTX);
26920 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
26921 r12 will be needed by out-of-line gpr save. */
26922 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26923 && !(strategy & (SAVE_INLINE_GPRS
26924 | SAVE_NOINLINE_GPRS_SAVES_LR))
26926 if (!WORLD_SAVE_P (info)
26928 && REGNO (frame_reg_rtx) != cr_save_regno
26929 && !(using_static_chain_p && cr_save_regno == 11)
26930 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
26932 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
26933 START_USE (cr_save_regno);
26934 rs6000_emit_prologue_move_from_cr (cr_save_rtx);
26937 /* Do any required saving of fpr's. If only one or two to save, do
26938 it ourselves. Otherwise, call function. */
26939 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
26941 int offset = info->fp_save_offset + frame_off;
26942 for (int i = info->first_fp_reg_save; i < 64; i++)
26945 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
26946 emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset,
26947 sp_off - frame_off);
26949 offset += fp_reg_size;
26952 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
26954 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
26955 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
26956 unsigned ptr_regno = ptr_regno_for_savres (sel);
26957 rtx ptr_reg = frame_reg_rtx;
26959 if (REGNO (frame_reg_rtx) == ptr_regno)
26960 gcc_checking_assert (frame_off == 0);
26963 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26964 NOT_INUSE (ptr_regno);
26965 emit_insn (gen_add3_insn (ptr_reg,
26966 frame_reg_rtx, GEN_INT (frame_off)));
26968 insn = rs6000_emit_savres_rtx (info, ptr_reg,
26969 info->fp_save_offset,
26970 info->lr_save_offset,
26972 rs6000_frame_related (insn, ptr_reg, sp_off,
26973 NULL_RTX, NULL_RTX);
26978 /* Save GPRs. This is done as a PARALLEL if we are using
26979 the store-multiple instructions. */
26980 if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
26982 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
26983 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
26984 unsigned ptr_regno = ptr_regno_for_savres (sel);
26985 rtx ptr_reg = frame_reg_rtx;
26986 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
26987 int end_save = info->gp_save_offset + info->gp_size;
26990 if (ptr_regno == 12)
26993 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26995 /* Need to adjust r11 (r12) if we saved any FPRs. */
26996 if (end_save + frame_off != 0)
26998 rtx offset = GEN_INT (end_save + frame_off);
27001 frame_off = -end_save;
27003 NOT_INUSE (ptr_regno);
27004 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27006 else if (!ptr_set_up)
27008 NOT_INUSE (ptr_regno);
27009 emit_move_insn (ptr_reg, frame_reg_rtx);
27011 ptr_off = -end_save;
27012 insn = rs6000_emit_savres_rtx (info, ptr_reg,
27013 info->gp_save_offset + ptr_off,
27014 info->lr_save_offset + ptr_off,
27016 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
27017 NULL_RTX, NULL_RTX);
27021 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
27025 p = rtvec_alloc (32 - info->first_gp_reg_save);
27026 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27028 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27030 info->gp_save_offset + frame_off + reg_size * i);
27031 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27032 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27033 NULL_RTX, NULL_RTX);
27035 else if (!WORLD_SAVE_P (info))
27037 int offset = info->gp_save_offset + frame_off;
27038 for (int i = info->first_gp_reg_save; i < 32; i++)
27041 && !cfun->machine->gpr_is_wrapped_separately[i])
27042 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
27043 sp_off - frame_off);
27045 offset += reg_size;
27049 if (crtl->calls_eh_return)
27056 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27057 if (regno == INVALID_REGNUM)
27061 p = rtvec_alloc (i);
27065 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27066 if (regno == INVALID_REGNUM)
27070 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
27072 info->ehrd_offset + sp_off + reg_size * (int) i);
27073 RTVEC_ELT (p, i) = set;
27074 RTX_FRAME_RELATED_P (set) = 1;
27077 insn = emit_insn (gen_blockage ());
27078 RTX_FRAME_RELATED_P (insn) = 1;
27079 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
27082 /* In AIX ABI we need to make sure r2 is really saved. */
27083 if (TARGET_AIX && crtl->calls_eh_return)
27085 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
27086 rtx join_insn, note;
27087 rtx_insn *save_insn;
27088 long toc_restore_insn;
27090 tmp_reg = gen_rtx_REG (Pmode, 11);
27091 tmp_reg_si = gen_rtx_REG (SImode, 11);
27092 if (using_static_chain_p)
27095 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
27099 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
27100 /* Peek at instruction to which this function returns. If it's
27101 restoring r2, then we know we've already saved r2. We can't
27102 unconditionally save r2 because the value we have will already
27103 be updated if we arrived at this function via a plt call or
27104 toc adjusting stub. */
27105 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
27106 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
27107 + RS6000_TOC_SAVE_SLOT);
27108 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
27109 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
27110 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
27111 validate_condition_mode (EQ, CCUNSmode);
27112 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
27113 emit_insn (gen_rtx_SET (compare_result,
27114 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
27115 toc_save_done = gen_label_rtx ();
27116 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27117 gen_rtx_EQ (VOIDmode, compare_result,
27119 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
27121 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27122 JUMP_LABEL (jump) = toc_save_done;
27123 LABEL_NUSES (toc_save_done) += 1;
27125 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
27126 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
27127 sp_off - frame_off);
27129 emit_label (toc_save_done);
27131 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
27132 have a CFG that has different saves along different paths.
27133 Move the note to a dummy blockage insn, which describes that
27134 R2 is unconditionally saved after the label. */
27135 /* ??? An alternate representation might be a special insn pattern
27136 containing both the branch and the store. That might let the
27137 code that minimizes the number of DW_CFA_advance opcodes better
27138 freedom in placing the annotations. */
27139 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
27141 remove_note (save_insn, note);
27143 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
27144 copy_rtx (PATTERN (save_insn)), NULL_RTX);
27145 RTX_FRAME_RELATED_P (save_insn) = 0;
27147 join_insn = emit_insn (gen_blockage ());
27148 REG_NOTES (join_insn) = note;
27149 RTX_FRAME_RELATED_P (join_insn) = 1;
27151 if (using_static_chain_p)
27153 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
27160 /* Save CR if we use any that must be preserved. */
27161 if (!WORLD_SAVE_P (info) && info->cr_save_p)
27163 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
27164 GEN_INT (info->cr_save_offset + frame_off));
27165 rtx mem = gen_frame_mem (SImode, addr);
27167 /* If we didn't copy cr before, do so now using r0. */
27168 if (cr_save_rtx == NULL_RTX)
27171 cr_save_rtx = gen_rtx_REG (SImode, 0);
27172 rs6000_emit_prologue_move_from_cr (cr_save_rtx);
27175 /* Saving CR requires a two-instruction sequence: one instruction
27176 to move the CR to a general-purpose register, and a second
27177 instruction that stores the GPR to memory.
27179 We do not emit any DWARF CFI records for the first of these,
27180 because we cannot properly represent the fact that CR is saved in
27181 a register. One reason is that we cannot express that multiple
27182 CR fields are saved; another reason is that on 64-bit, the size
27183 of the CR register in DWARF (4 bytes) differs from the size of
27184 a general-purpose register.
27186 This means if any intervening instruction were to clobber one of
27187 the call-saved CR fields, we'd have incorrect CFI. To prevent
27188 this from happening, we mark the store to memory as a use of
27189 those CR fields, which prevents any such instruction from being
27190 scheduled in between the two instructions. */
27195 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
27196 for (i = 0; i < 8; i++)
27197 if (save_reg_p (CR0_REGNO + i))
27198 crsave_v[n_crsave++]
27199 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
27201 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
27202 gen_rtvec_v (n_crsave, crsave_v)));
27203 END_USE (REGNO (cr_save_rtx));
27205 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
27206 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
27207 so we need to construct a frame expression manually. */
27208 RTX_FRAME_RELATED_P (insn) = 1;
27210 /* Update address to be stack-pointer relative, like
27211 rs6000_frame_related would do. */
27212 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
27213 GEN_INT (info->cr_save_offset + sp_off));
27214 mem = gen_frame_mem (SImode, addr);
27216 if (DEFAULT_ABI == ABI_ELFv2)
27218 /* In the ELFv2 ABI we generate separate CFI records for each
27219 CR field that was actually saved. They all point to the
27220 same 32-bit stack slot. */
27224 for (i = 0; i < 8; i++)
27225 if (save_reg_p (CR0_REGNO + i))
27228 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
27230 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
27234 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27235 gen_rtx_PARALLEL (VOIDmode,
27236 gen_rtvec_v (n_crframe, crframe)));
27240 /* In other ABIs, by convention, we use a single CR regnum to
27241 represent the fact that all call-saved CR fields are saved.
27242 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
27243 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
27244 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
27248 /* In the ELFv2 ABI we need to save all call-saved CR fields into
27249 *separate* slots if the routine calls __builtin_eh_return, so
27250 that they can be independently restored by the unwinder. */
27251 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27253 int i, cr_off = info->ehcr_offset;
27256 /* ??? We might get better performance by using multiple mfocrf
27258 crsave = gen_rtx_REG (SImode, 0);
27259 emit_insn (gen_prologue_movesi_from_cr (crsave));
27261 for (i = 0; i < 8; i++)
27262 if (!call_used_regs[CR0_REGNO + i])
27264 rtvec p = rtvec_alloc (2);
27266 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
27268 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
27270 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27272 RTX_FRAME_RELATED_P (insn) = 1;
27273 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27274 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
27275 sp_reg_rtx, cr_off + sp_off));
27277 cr_off += reg_size;
27281 /* If we are emitting stack probes, but allocate no stack, then
27282 just note that in the dump file. */
27283 if (flag_stack_clash_protection
27286 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
27288 /* Update stack and set back pointer unless this is V.4,
27289 for which it was done previously. */
27290 if (!WORLD_SAVE_P (info) && info->push_p
27291 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
27293 rtx ptr_reg = NULL;
27296 /* If saving altivec regs we need to be able to address all save
27297 locations using a 16-bit offset. */
27298 if ((strategy & SAVE_INLINE_VRS) == 0
27299 || (info->altivec_size != 0
27300 && (info->altivec_save_offset + info->altivec_size - 16
27301 + info->total_size - frame_off) > 32767)
27302 || (info->vrsave_size != 0
27303 && (info->vrsave_save_offset
27304 + info->total_size - frame_off) > 32767))
27306 int sel = SAVRES_SAVE | SAVRES_VR;
27307 unsigned ptr_regno = ptr_regno_for_savres (sel);
27309 if (using_static_chain_p
27310 && ptr_regno == STATIC_CHAIN_REGNUM)
27312 if (REGNO (frame_reg_rtx) != ptr_regno)
27313 START_USE (ptr_regno);
27314 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27315 frame_reg_rtx = ptr_reg;
27316 ptr_off = info->altivec_save_offset + info->altivec_size;
27317 frame_off = -ptr_off;
27319 else if (REGNO (frame_reg_rtx) == 1)
27320 frame_off = info->total_size;
27321 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
27323 if (REGNO (frame_reg_rtx) == 12)
27325 sp_off = info->total_size;
27326 if (frame_reg_rtx != sp_reg_rtx)
27327 rs6000_emit_stack_tie (frame_reg_rtx, false);
27330 /* Set frame pointer, if needed. */
27331 if (frame_pointer_needed)
27333 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
27335 RTX_FRAME_RELATED_P (insn) = 1;
27338 /* Save AltiVec registers if needed. Save here because the red zone does
27339 not always include AltiVec registers. */
27340 if (!WORLD_SAVE_P (info)
27341 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
27343 int end_save = info->altivec_save_offset + info->altivec_size;
27345 /* Oddly, the vector save/restore functions point r0 at the end
27346 of the save area, then use r11 or r12 to load offsets for
27347 [reg+reg] addressing. */
27348 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27349 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
27350 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27352 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
27354 if (scratch_regno == 12)
27356 if (end_save + frame_off != 0)
27358 rtx offset = GEN_INT (end_save + frame_off);
27360 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27363 emit_move_insn (ptr_reg, frame_reg_rtx);
27365 ptr_off = -end_save;
27366 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27367 info->altivec_save_offset + ptr_off,
27368 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
27369 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
27370 NULL_RTX, NULL_RTX);
27371 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
27373 /* The oddity mentioned above clobbered our frame reg. */
27374 emit_move_insn (frame_reg_rtx, ptr_reg);
27375 frame_off = ptr_off;
27378 else if (!WORLD_SAVE_P (info)
27379 && info->altivec_size != 0)
27383 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27384 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27386 rtx areg, savereg, mem;
27387 HOST_WIDE_INT offset;
27389 offset = (info->altivec_save_offset + frame_off
27390 + 16 * (i - info->first_altivec_reg_save));
27392 savereg = gen_rtx_REG (V4SImode, i);
27394 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
27396 mem = gen_frame_mem (V4SImode,
27397 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27398 GEN_INT (offset)));
27399 insn = emit_insn (gen_rtx_SET (mem, savereg));
27405 areg = gen_rtx_REG (Pmode, 0);
27406 emit_move_insn (areg, GEN_INT (offset));
27408 /* AltiVec addressing mode is [reg+reg]. */
27409 mem = gen_frame_mem (V4SImode,
27410 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
27412 /* Rather than emitting a generic move, force use of the stvx
27413 instruction, which we always want on ISA 2.07 (power8) systems.
27414 In particular we don't want xxpermdi/stxvd2x for little
27416 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
27419 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27420 areg, GEN_INT (offset));
27424 /* VRSAVE is a bit vector representing which AltiVec registers
27425 are used. The OS uses this to determine which vector
27426 registers to save on a context switch. We need to save
27427 VRSAVE on the stack frame, add whatever AltiVec registers we
27428 used in this function, and do the corresponding magic in the
27431 if (!WORLD_SAVE_P (info) && info->vrsave_size != 0)
27433 /* Get VRSAVE into a GPR. Note that ABI_V4 and ABI_DARWIN might
27434 be using r12 as frame_reg_rtx and r11 as the static chain
27435 pointer for nested functions. */
27436 int save_regno = 12;
27437 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27438 && !using_static_chain_p)
27440 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
27443 if (using_static_chain_p)
27446 NOT_INUSE (save_regno);
27448 emit_vrsave_prologue (info, save_regno, frame_off, frame_reg_rtx);
27451 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
27452 if (!TARGET_SINGLE_PIC_BASE
27453 && ((TARGET_TOC && TARGET_MINIMAL_TOC
27454 && !constant_pool_empty_p ())
27455 || (DEFAULT_ABI == ABI_V4
27456 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
27457 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
27459 /* If emit_load_toc_table will use the link register, we need to save
27460 it. We use R12 for this purpose because emit_load_toc_table
27461 can use register 0. This allows us to use a plain 'blr' to return
27462 from the procedure more often. */
27463 int save_LR_around_toc_setup = (TARGET_ELF
27464 && DEFAULT_ABI == ABI_V4
27466 && ! info->lr_save_p
27467 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
27468 if (save_LR_around_toc_setup)
27470 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27471 rtx tmp = gen_rtx_REG (Pmode, 12);
27474 insn = emit_move_insn (tmp, lr);
27475 RTX_FRAME_RELATED_P (insn) = 1;
27477 rs6000_emit_load_toc_table (TRUE);
27479 insn = emit_move_insn (lr, tmp);
27480 add_reg_note (insn, REG_CFA_RESTORE, lr);
27481 RTX_FRAME_RELATED_P (insn) = 1;
27484 rs6000_emit_load_toc_table (TRUE);
27488 if (!TARGET_SINGLE_PIC_BASE
27489 && DEFAULT_ABI == ABI_DARWIN
27490 && flag_pic && crtl->uses_pic_offset_table)
27492 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27493 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
27495 /* Save and restore LR locally around this call (in R0). */
27496 if (!info->lr_save_p)
27497 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
27499 emit_insn (gen_load_macho_picbase (src));
27501 emit_move_insn (gen_rtx_REG (Pmode,
27502 RS6000_PIC_OFFSET_TABLE_REGNUM),
27505 if (!info->lr_save_p)
27506 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
27510 /* If we need to, save the TOC register after doing the stack setup.
27511 Do not emit eh frame info for this save. The unwinder wants info,
27512 conceptually attached to instructions in this function, about
27513 register values in the caller of this function. This R2 may have
27514 already been changed from the value in the caller.
27515 We don't attempt to write accurate DWARF EH frame info for R2
27516 because code emitted by gcc for a (non-pointer) function call
27517 doesn't save and restore R2. Instead, R2 is managed out-of-line
27518 by a linker generated plt call stub when the function resides in
27519 a shared library. This behavior is costly to describe in DWARF,
27520 both in terms of the size of DWARF info and the time taken in the
27521 unwinder to interpret it. R2 changes, apart from the
27522 calls_eh_return case earlier in this function, are handled by
27523 linux-unwind.h frob_update_context. */
27524 if (rs6000_save_toc_in_prologue_p ()
27525 && !cfun->machine->toc_is_wrapped_separately)
27527 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
27528 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
27531 /* Set up the arg pointer (r12) for -fsplit-stack code. */
27532 if (using_split_stack && split_stack_arg_pointer_used_p ())
27533 emit_split_stack_prologue (info, sp_adjust, frame_off, frame_reg_rtx);
27536 /* Output .extern statements for the save/restore routines we use. */
27539 rs6000_output_savres_externs (FILE *file)
27541 rs6000_stack_t *info = rs6000_stack_info ();
27543 if (TARGET_DEBUG_STACK)
27544 debug_stack_info (info);
27546 /* Write .extern for any function we will call to save and restore
27548 if (info->first_fp_reg_save < 64
27553 int regno = info->first_fp_reg_save - 32;
27555 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
27557 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27558 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27559 name = rs6000_savres_routine_name (regno, sel);
27560 fprintf (file, "\t.extern %s\n", name);
27562 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
27564 bool lr = (info->savres_strategy
27565 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27566 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27567 name = rs6000_savres_routine_name (regno, sel);
27568 fprintf (file, "\t.extern %s\n", name);
27573 /* Write function prologue. */
27576 rs6000_output_function_prologue (FILE *file)
27578 if (!cfun->is_thunk)
27579 rs6000_output_savres_externs (file);
27581 /* ELFv2 ABI r2 setup code and local entry point. This must follow
27582 immediately after the global entry point label. */
27583 if (rs6000_global_entry_point_needed_p ())
27585 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27587 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
27589 if (TARGET_CMODEL != CMODEL_LARGE)
27591 /* In the small and medium code models, we assume the TOC is less
27592 2 GB away from the text section, so it can be computed via the
27593 following two-instruction sequence. */
27596 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27597 fprintf (file, "0:\taddis 2,12,.TOC.-");
27598 assemble_name (file, buf);
27599 fprintf (file, "@ha\n");
27600 fprintf (file, "\taddi 2,2,.TOC.-");
27601 assemble_name (file, buf);
27602 fprintf (file, "@l\n");
27606 /* In the large code model, we allow arbitrary offsets between the
27607 TOC and the text section, so we have to load the offset from
27608 memory. The data field is emitted directly before the global
27609 entry point in rs6000_elf_declare_function_name. */
27612 #ifdef HAVE_AS_ENTRY_MARKERS
27613 /* If supported by the linker, emit a marker relocation. If the
27614 total code size of the final executable or shared library
27615 happens to fit into 2 GB after all, the linker will replace
27616 this code sequence with the sequence for the small or medium
27618 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
27620 fprintf (file, "\tld 2,");
27621 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
27622 assemble_name (file, buf);
27623 fprintf (file, "-");
27624 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27625 assemble_name (file, buf);
27626 fprintf (file, "(12)\n");
27627 fprintf (file, "\tadd 2,2,12\n");
27630 fputs ("\t.localentry\t", file);
27631 assemble_name (file, name);
27632 fputs (",.-", file);
27633 assemble_name (file, name);
27634 fputs ("\n", file);
27637 /* Output -mprofile-kernel code. This needs to be done here instead of
27638 in output_function_profile since it must go after the ELFv2 ABI
27639 local entry point. */
27640 if (TARGET_PROFILE_KERNEL && crtl->profile)
27642 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27643 gcc_assert (!TARGET_32BIT);
27645 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
27647 /* In the ELFv2 ABI we have no compiler stack word. It must be
27648 the resposibility of _mcount to preserve the static chain
27649 register if required. */
27650 if (DEFAULT_ABI != ABI_ELFv2
27651 && cfun->static_chain_decl != NULL)
27653 asm_fprintf (file, "\tstd %s,24(%s)\n",
27654 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27655 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27656 asm_fprintf (file, "\tld %s,24(%s)\n",
27657 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27660 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27663 rs6000_pic_labelno++;
27666 /* -mprofile-kernel code calls mcount before the function prolog,
27667 so a profiled leaf function should stay a leaf function. */
27669 rs6000_keep_leaf_when_profiled ()
27671 return TARGET_PROFILE_KERNEL;
27674 /* Non-zero if vmx regs are restored before the frame pop, zero if
27675 we restore after the pop when possible. */
27676 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
27678 /* Restoring cr is a two step process: loading a reg from the frame
27679 save, then moving the reg to cr. For ABI_V4 we must let the
27680 unwinder know that the stack location is no longer valid at or
27681 before the stack deallocation, but we can't emit a cfa_restore for
27682 cr at the stack deallocation like we do for other registers.
27683 The trouble is that it is possible for the move to cr to be
27684 scheduled after the stack deallocation. So say exactly where cr
27685 is located on each of the two insns. */
27688 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
27690 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
27691 rtx reg = gen_rtx_REG (SImode, regno);
27692 rtx_insn *insn = emit_move_insn (reg, mem);
27694 if (!exit_func && DEFAULT_ABI == ABI_V4)
27696 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27697 rtx set = gen_rtx_SET (reg, cr);
27699 add_reg_note (insn, REG_CFA_REGISTER, set);
27700 RTX_FRAME_RELATED_P (insn) = 1;
27705 /* Reload CR from REG. */
27708 restore_saved_cr (rtx reg, bool using_mfcr_multiple, bool exit_func)
27713 if (using_mfcr_multiple)
27715 for (i = 0; i < 8; i++)
27716 if (save_reg_p (CR0_REGNO + i))
27718 gcc_assert (count);
27721 if (using_mfcr_multiple && count > 1)
27727 p = rtvec_alloc (count);
27730 for (i = 0; i < 8; i++)
27731 if (save_reg_p (CR0_REGNO + i))
27733 rtvec r = rtvec_alloc (2);
27734 RTVEC_ELT (r, 0) = reg;
27735 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
27736 RTVEC_ELT (p, ndx) =
27737 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
27738 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
27741 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27742 gcc_assert (ndx == count);
27744 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27745 CR field separately. */
27746 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27748 for (i = 0; i < 8; i++)
27749 if (save_reg_p (CR0_REGNO + i))
27750 add_reg_note (insn, REG_CFA_RESTORE,
27751 gen_rtx_REG (SImode, CR0_REGNO + i));
27753 RTX_FRAME_RELATED_P (insn) = 1;
27757 for (i = 0; i < 8; i++)
27758 if (save_reg_p (CR0_REGNO + i))
27760 rtx insn = emit_insn (gen_movsi_to_cr_one
27761 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
27763 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27764 CR field separately, attached to the insn that in fact
27765 restores this particular CR field. */
27766 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27768 add_reg_note (insn, REG_CFA_RESTORE,
27769 gen_rtx_REG (SImode, CR0_REGNO + i));
27771 RTX_FRAME_RELATED_P (insn) = 1;
27775 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
27776 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
27777 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
27779 rtx_insn *insn = get_last_insn ();
27780 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27782 add_reg_note (insn, REG_CFA_RESTORE, cr);
27783 RTX_FRAME_RELATED_P (insn) = 1;
27787 /* Like cr, the move to lr instruction can be scheduled after the
27788 stack deallocation, but unlike cr, its stack frame save is still
27789 valid. So we only need to emit the cfa_restore on the correct
27793 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
27795 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
27796 rtx reg = gen_rtx_REG (Pmode, regno);
27798 emit_move_insn (reg, mem);
27802 restore_saved_lr (int regno, bool exit_func)
27804 rtx reg = gen_rtx_REG (Pmode, regno);
27805 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27806 rtx_insn *insn = emit_move_insn (lr, reg);
27808 if (!exit_func && flag_shrink_wrap)
27810 add_reg_note (insn, REG_CFA_RESTORE, lr);
27811 RTX_FRAME_RELATED_P (insn) = 1;
27816 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
27818 if (DEFAULT_ABI == ABI_ELFv2)
27821 for (i = 0; i < 8; i++)
27822 if (save_reg_p (CR0_REGNO + i))
27824 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
27825 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
27829 else if (info->cr_save_p)
27830 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27831 gen_rtx_REG (SImode, CR2_REGNO),
27834 if (info->lr_save_p)
27835 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27836 gen_rtx_REG (Pmode, LR_REGNO),
27838 return cfa_restores;
27841 /* Return true if OFFSET from stack pointer can be clobbered by signals.
27842 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
27843 below stack pointer not cloberred by signals. */
27846 offset_below_red_zone_p (HOST_WIDE_INT offset)
27848 return offset < (DEFAULT_ABI == ABI_V4
27850 : TARGET_32BIT ? -220 : -288);
27853 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
27856 emit_cfa_restores (rtx cfa_restores)
27858 rtx_insn *insn = get_last_insn ();
27859 rtx *loc = ®_NOTES (insn);
27862 loc = &XEXP (*loc, 1);
27863 *loc = cfa_restores;
27864 RTX_FRAME_RELATED_P (insn) = 1;
27867 /* Emit function epilogue as insns. */
27870 rs6000_emit_epilogue (enum epilogue_type epilogue_type)
27872 HOST_WIDE_INT frame_off = 0;
27873 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
27874 rtx frame_reg_rtx = sp_reg_rtx;
27875 rtx cfa_restores = NULL_RTX;
27877 rtx cr_save_reg = NULL_RTX;
27878 machine_mode reg_mode = Pmode;
27879 int reg_size = TARGET_32BIT ? 4 : 8;
27880 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
27881 int fp_reg_size = 8;
27883 unsigned ptr_regno;
27885 rs6000_stack_t *info = rs6000_stack_info ();
27887 if (epilogue_type == EPILOGUE_TYPE_NORMAL && crtl->calls_eh_return)
27888 epilogue_type = EPILOGUE_TYPE_EH_RETURN;
27890 int strategy = info->savres_strategy;
27891 bool using_load_multiple = !!(strategy & REST_MULTIPLE);
27892 bool restoring_GPRs_inline = !!(strategy & REST_INLINE_GPRS);
27893 bool restoring_FPRs_inline = !!(strategy & REST_INLINE_FPRS);
27894 if (epilogue_type == EPILOGUE_TYPE_SIBCALL)
27896 restoring_GPRs_inline = true;
27897 restoring_FPRs_inline = true;
27900 bool using_mtcr_multiple = (rs6000_tune == PROCESSOR_PPC601
27901 || rs6000_tune == PROCESSOR_PPC603
27902 || rs6000_tune == PROCESSOR_PPC750
27905 /* Restore via the backchain when we have a large frame, since this
27906 is more efficient than an addis, addi pair. The second condition
27907 here will not trigger at the moment; We don't actually need a
27908 frame pointer for alloca, but the generic parts of the compiler
27909 give us one anyway. */
27910 bool use_backchain_to_restore_sp
27911 = (info->total_size + (info->lr_save_p ? info->lr_save_offset : 0) > 32767
27912 || (cfun->calls_alloca && !frame_pointer_needed));
27914 bool restore_lr = (info->lr_save_p
27915 && (restoring_FPRs_inline
27916 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
27917 && (restoring_GPRs_inline
27918 || info->first_fp_reg_save < 64)
27919 && !cfun->machine->lr_is_wrapped_separately);
27922 if (WORLD_SAVE_P (info))
27924 gcc_assert (epilogue_type != EPILOGUE_TYPE_SIBCALL);
27926 /* eh_rest_world_r10 will return to the location saved in the LR
27927 stack slot (which is not likely to be our caller.)
27928 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
27929 rest_world is similar, except any R10 parameter is ignored.
27930 The exception-handling stuff that was here in 2.95 is no
27931 longer necessary. */
27935 + 32 - info->first_gp_reg_save
27936 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
27937 + 63 + 1 - info->first_fp_reg_save);
27940 switch (epilogue_type)
27942 case EPILOGUE_TYPE_NORMAL:
27943 rname = ggc_strdup ("*rest_world");
27946 case EPILOGUE_TYPE_EH_RETURN:
27947 rname = ggc_strdup ("*eh_rest_world_r10");
27951 gcc_unreachable ();
27955 RTVEC_ELT (p, j++) = ret_rtx;
27957 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, rname));
27958 /* The instruction pattern requires a clobber here;
27959 it is shared with the restVEC helper. */
27960 RTVEC_ELT (p, j++) = gen_hard_reg_clobber (Pmode, 11);
27963 /* CR register traditionally saved as CR2. */
27964 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
27966 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
27967 if (flag_shrink_wrap)
27969 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27970 gen_rtx_REG (Pmode, LR_REGNO),
27972 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27977 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27979 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
27981 = gen_frame_load (reg,
27982 frame_reg_rtx, info->gp_save_offset + reg_size * i);
27983 if (flag_shrink_wrap
27984 && save_reg_p (info->first_gp_reg_save + i))
27985 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27987 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
27989 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
27991 = gen_frame_load (reg,
27992 frame_reg_rtx, info->altivec_save_offset + 16 * i);
27993 if (flag_shrink_wrap
27994 && save_reg_p (info->first_altivec_reg_save + i))
27995 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27997 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
27999 rtx reg = gen_rtx_REG (TARGET_HARD_FLOAT ? DFmode : SFmode,
28000 info->first_fp_reg_save + i);
28002 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
28003 if (flag_shrink_wrap
28004 && save_reg_p (info->first_fp_reg_save + i))
28005 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28007 RTVEC_ELT (p, j++) = gen_hard_reg_clobber (Pmode, 0);
28008 RTVEC_ELT (p, j++) = gen_hard_reg_clobber (SImode, 12);
28009 RTVEC_ELT (p, j++) = gen_hard_reg_clobber (SImode, 7);
28010 RTVEC_ELT (p, j++) = gen_hard_reg_clobber (SImode, 8);
28012 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
28013 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28015 if (flag_shrink_wrap)
28017 REG_NOTES (insn) = cfa_restores;
28018 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28019 RTX_FRAME_RELATED_P (insn) = 1;
28024 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
28026 frame_off = info->total_size;
28028 /* Restore AltiVec registers if we must do so before adjusting the
28030 if (info->altivec_size != 0
28031 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28032 || (DEFAULT_ABI != ABI_V4
28033 && offset_below_red_zone_p (info->altivec_save_offset))))
28036 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
28038 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
28039 if (use_backchain_to_restore_sp)
28041 int frame_regno = 11;
28043 if ((strategy & REST_INLINE_VRS) == 0)
28045 /* Of r11 and r12, select the one not clobbered by an
28046 out-of-line restore function for the frame register. */
28047 frame_regno = 11 + 12 - scratch_regno;
28049 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
28050 emit_move_insn (frame_reg_rtx,
28051 gen_rtx_MEM (Pmode, sp_reg_rtx));
28054 else if (frame_pointer_needed)
28055 frame_reg_rtx = hard_frame_pointer_rtx;
28057 if ((strategy & REST_INLINE_VRS) == 0)
28059 int end_save = info->altivec_save_offset + info->altivec_size;
28061 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28062 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28064 if (end_save + frame_off != 0)
28066 rtx offset = GEN_INT (end_save + frame_off);
28068 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28071 emit_move_insn (ptr_reg, frame_reg_rtx);
28073 ptr_off = -end_save;
28074 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28075 info->altivec_save_offset + ptr_off,
28076 0, V4SImode, SAVRES_VR);
28080 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28081 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28083 rtx addr, areg, mem, insn;
28084 rtx reg = gen_rtx_REG (V4SImode, i);
28085 HOST_WIDE_INT offset
28086 = (info->altivec_save_offset + frame_off
28087 + 16 * (i - info->first_altivec_reg_save));
28089 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
28091 mem = gen_frame_mem (V4SImode,
28092 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28093 GEN_INT (offset)));
28094 insn = gen_rtx_SET (reg, mem);
28098 areg = gen_rtx_REG (Pmode, 0);
28099 emit_move_insn (areg, GEN_INT (offset));
28101 /* AltiVec addressing mode is [reg+reg]. */
28102 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28103 mem = gen_frame_mem (V4SImode, addr);
28105 /* Rather than emitting a generic move, force use of the
28106 lvx instruction, which we always want. In particular we
28107 don't want lxvd2x/xxpermdi for little endian. */
28108 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28111 (void) emit_insn (insn);
28115 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28116 if (((strategy & REST_INLINE_VRS) == 0
28117 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28118 && (flag_shrink_wrap
28119 || (offset_below_red_zone_p
28120 (info->altivec_save_offset
28121 + 16 * (i - info->first_altivec_reg_save))))
28124 rtx reg = gen_rtx_REG (V4SImode, i);
28125 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28129 /* Restore VRSAVE if we must do so before adjusting the stack. */
28130 if (info->vrsave_size != 0
28131 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28132 || (DEFAULT_ABI != ABI_V4
28133 && offset_below_red_zone_p (info->vrsave_save_offset))))
28137 if (frame_reg_rtx == sp_reg_rtx)
28139 if (use_backchain_to_restore_sp)
28141 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28142 emit_move_insn (frame_reg_rtx,
28143 gen_rtx_MEM (Pmode, sp_reg_rtx));
28146 else if (frame_pointer_needed)
28147 frame_reg_rtx = hard_frame_pointer_rtx;
28150 reg = gen_rtx_REG (SImode, 12);
28151 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28152 info->vrsave_save_offset + frame_off));
28154 emit_insn (generate_set_vrsave (reg, info, 1));
28158 /* If we have a large stack frame, restore the old stack pointer
28159 using the backchain. */
28160 if (use_backchain_to_restore_sp)
28162 if (frame_reg_rtx == sp_reg_rtx)
28164 /* Under V.4, don't reset the stack pointer until after we're done
28165 loading the saved registers. */
28166 if (DEFAULT_ABI == ABI_V4)
28167 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28169 insn = emit_move_insn (frame_reg_rtx,
28170 gen_rtx_MEM (Pmode, sp_reg_rtx));
28173 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28174 && DEFAULT_ABI == ABI_V4)
28175 /* frame_reg_rtx has been set up by the altivec restore. */
28179 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
28180 frame_reg_rtx = sp_reg_rtx;
28183 /* If we have a frame pointer, we can restore the old stack pointer
28185 else if (frame_pointer_needed)
28187 frame_reg_rtx = sp_reg_rtx;
28188 if (DEFAULT_ABI == ABI_V4)
28189 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28190 /* Prevent reordering memory accesses against stack pointer restore. */
28191 else if (cfun->calls_alloca
28192 || offset_below_red_zone_p (-info->total_size))
28193 rs6000_emit_stack_tie (frame_reg_rtx, true);
28195 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
28196 GEN_INT (info->total_size)));
28199 else if (info->push_p
28200 && DEFAULT_ABI != ABI_V4
28201 && epilogue_type != EPILOGUE_TYPE_EH_RETURN)
28203 /* Prevent reordering memory accesses against stack pointer restore. */
28204 if (cfun->calls_alloca
28205 || offset_below_red_zone_p (-info->total_size))
28206 rs6000_emit_stack_tie (frame_reg_rtx, false);
28207 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
28208 GEN_INT (info->total_size)));
28211 if (insn && frame_reg_rtx == sp_reg_rtx)
28215 REG_NOTES (insn) = cfa_restores;
28216 cfa_restores = NULL_RTX;
28218 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28219 RTX_FRAME_RELATED_P (insn) = 1;
28222 /* Restore AltiVec registers if we have not done so already. */
28223 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28224 && info->altivec_size != 0
28225 && (DEFAULT_ABI == ABI_V4
28226 || !offset_below_red_zone_p (info->altivec_save_offset)))
28230 if ((strategy & REST_INLINE_VRS) == 0)
28232 int end_save = info->altivec_save_offset + info->altivec_size;
28234 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28235 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
28236 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28238 if (end_save + frame_off != 0)
28240 rtx offset = GEN_INT (end_save + frame_off);
28242 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28245 emit_move_insn (ptr_reg, frame_reg_rtx);
28247 ptr_off = -end_save;
28248 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28249 info->altivec_save_offset + ptr_off,
28250 0, V4SImode, SAVRES_VR);
28251 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
28253 /* Frame reg was clobbered by out-of-line save. Restore it
28254 from ptr_reg, and if we are calling out-of-line gpr or
28255 fpr restore set up the correct pointer and offset. */
28256 unsigned newptr_regno = 1;
28257 if (!restoring_GPRs_inline)
28259 bool lr = info->gp_save_offset + info->gp_size == 0;
28260 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28261 newptr_regno = ptr_regno_for_savres (sel);
28262 end_save = info->gp_save_offset + info->gp_size;
28264 else if (!restoring_FPRs_inline)
28266 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
28267 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28268 newptr_regno = ptr_regno_for_savres (sel);
28269 end_save = info->fp_save_offset + info->fp_size;
28272 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
28273 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
28275 if (end_save + ptr_off != 0)
28277 rtx offset = GEN_INT (end_save + ptr_off);
28279 frame_off = -end_save;
28281 emit_insn (gen_addsi3_carry (frame_reg_rtx,
28284 emit_insn (gen_adddi3_carry (frame_reg_rtx,
28289 frame_off = ptr_off;
28290 emit_move_insn (frame_reg_rtx, ptr_reg);
28296 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28297 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28299 rtx addr, areg, mem, insn;
28300 rtx reg = gen_rtx_REG (V4SImode, i);
28301 HOST_WIDE_INT offset
28302 = (info->altivec_save_offset + frame_off
28303 + 16 * (i - info->first_altivec_reg_save));
28305 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
28307 mem = gen_frame_mem (V4SImode,
28308 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28309 GEN_INT (offset)));
28310 insn = gen_rtx_SET (reg, mem);
28314 areg = gen_rtx_REG (Pmode, 0);
28315 emit_move_insn (areg, GEN_INT (offset));
28317 /* AltiVec addressing mode is [reg+reg]. */
28318 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28319 mem = gen_frame_mem (V4SImode, addr);
28321 /* Rather than emitting a generic move, force use of the
28322 lvx instruction, which we always want. In particular we
28323 don't want lxvd2x/xxpermdi for little endian. */
28324 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28327 (void) emit_insn (insn);
28331 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28332 if (((strategy & REST_INLINE_VRS) == 0
28333 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28334 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28337 rtx reg = gen_rtx_REG (V4SImode, i);
28338 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28342 /* Restore VRSAVE if we have not done so already. */
28343 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28344 && info->vrsave_size != 0
28345 && (DEFAULT_ABI == ABI_V4
28346 || !offset_below_red_zone_p (info->vrsave_save_offset)))
28350 reg = gen_rtx_REG (SImode, 12);
28351 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28352 info->vrsave_save_offset + frame_off));
28354 emit_insn (generate_set_vrsave (reg, info, 1));
28357 /* If we exit by an out-of-line restore function on ABI_V4 then that
28358 function will deallocate the stack, so we don't need to worry
28359 about the unwinder restoring cr from an invalid stack frame
28361 bool exit_func = (!restoring_FPRs_inline
28362 || (!restoring_GPRs_inline
28363 && info->first_fp_reg_save == 64));
28365 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
28366 *separate* slots if the routine calls __builtin_eh_return, so
28367 that they can be independently restored by the unwinder. */
28368 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
28370 int i, cr_off = info->ehcr_offset;
28372 for (i = 0; i < 8; i++)
28373 if (!call_used_regs[CR0_REGNO + i])
28375 rtx reg = gen_rtx_REG (SImode, 0);
28376 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28377 cr_off + frame_off));
28379 insn = emit_insn (gen_movsi_to_cr_one
28380 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
28382 if (!exit_func && flag_shrink_wrap)
28384 add_reg_note (insn, REG_CFA_RESTORE,
28385 gen_rtx_REG (SImode, CR0_REGNO + i));
28387 RTX_FRAME_RELATED_P (insn) = 1;
28390 cr_off += reg_size;
28394 /* Get the old lr if we saved it. If we are restoring registers
28395 out-of-line, then the out-of-line routines can do this for us. */
28396 if (restore_lr && restoring_GPRs_inline)
28397 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28399 /* Get the old cr if we saved it. */
28400 if (info->cr_save_p)
28402 unsigned cr_save_regno = 12;
28404 if (!restoring_GPRs_inline)
28406 /* Ensure we don't use the register used by the out-of-line
28407 gpr register restore below. */
28408 bool lr = info->gp_save_offset + info->gp_size == 0;
28409 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28410 int gpr_ptr_regno = ptr_regno_for_savres (sel);
28412 if (gpr_ptr_regno == 12)
28413 cr_save_regno = 11;
28414 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
28416 else if (REGNO (frame_reg_rtx) == 12)
28417 cr_save_regno = 11;
28419 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
28420 info->cr_save_offset + frame_off,
28424 /* Set LR here to try to overlap restores below. */
28425 if (restore_lr && restoring_GPRs_inline)
28426 restore_saved_lr (0, exit_func);
28428 /* Load exception handler data registers, if needed. */
28429 if (epilogue_type == EPILOGUE_TYPE_EH_RETURN)
28431 unsigned int i, regno;
28435 rtx reg = gen_rtx_REG (reg_mode, 2);
28436 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28437 frame_off + RS6000_TOC_SAVE_SLOT));
28444 regno = EH_RETURN_DATA_REGNO (i);
28445 if (regno == INVALID_REGNUM)
28448 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
28449 info->ehrd_offset + frame_off
28450 + reg_size * (int) i);
28452 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
28456 /* Restore GPRs. This is done as a PARALLEL if we are using
28457 the load-multiple instructions. */
28458 if (!restoring_GPRs_inline)
28460 /* We are jumping to an out-of-line function. */
28462 int end_save = info->gp_save_offset + info->gp_size;
28463 bool can_use_exit = end_save == 0;
28464 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
28467 /* Emit stack reset code if we need it. */
28468 ptr_regno = ptr_regno_for_savres (sel);
28469 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28471 rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28472 else if (end_save + frame_off != 0)
28473 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
28474 GEN_INT (end_save + frame_off)));
28475 else if (REGNO (frame_reg_rtx) != ptr_regno)
28476 emit_move_insn (ptr_reg, frame_reg_rtx);
28477 if (REGNO (frame_reg_rtx) == ptr_regno)
28478 frame_off = -end_save;
28480 if (can_use_exit && info->cr_save_p)
28481 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
28483 ptr_off = -end_save;
28484 rs6000_emit_savres_rtx (info, ptr_reg,
28485 info->gp_save_offset + ptr_off,
28486 info->lr_save_offset + ptr_off,
28489 else if (using_load_multiple)
28492 p = rtvec_alloc (32 - info->first_gp_reg_save);
28493 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28495 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28497 info->gp_save_offset + frame_off + reg_size * i);
28498 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28502 int offset = info->gp_save_offset + frame_off;
28503 for (i = info->first_gp_reg_save; i < 32; i++)
28506 && !cfun->machine->gpr_is_wrapped_separately[i])
28508 rtx reg = gen_rtx_REG (reg_mode, i);
28509 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28512 offset += reg_size;
28516 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28518 /* If the frame pointer was used then we can't delay emitting
28519 a REG_CFA_DEF_CFA note. This must happen on the insn that
28520 restores the frame pointer, r31. We may have already emitted
28521 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
28522 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
28523 be harmless if emitted. */
28524 if (frame_pointer_needed)
28526 insn = get_last_insn ();
28527 add_reg_note (insn, REG_CFA_DEF_CFA,
28528 plus_constant (Pmode, frame_reg_rtx, frame_off));
28529 RTX_FRAME_RELATED_P (insn) = 1;
28532 /* Set up cfa_restores. We always need these when
28533 shrink-wrapping. If not shrink-wrapping then we only need
28534 the cfa_restore when the stack location is no longer valid.
28535 The cfa_restores must be emitted on or before the insn that
28536 invalidates the stack, and of course must not be emitted
28537 before the insn that actually does the restore. The latter
28538 is why it is a bad idea to emit the cfa_restores as a group
28539 on the last instruction here that actually does a restore:
28540 That insn may be reordered with respect to others doing
28542 if (flag_shrink_wrap
28543 && !restoring_GPRs_inline
28544 && info->first_fp_reg_save == 64)
28545 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28547 for (i = info->first_gp_reg_save; i < 32; i++)
28549 && !cfun->machine->gpr_is_wrapped_separately[i])
28551 rtx reg = gen_rtx_REG (reg_mode, i);
28552 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28556 if (!restoring_GPRs_inline
28557 && info->first_fp_reg_save == 64)
28559 /* We are jumping to an out-of-line function. */
28561 emit_cfa_restores (cfa_restores);
28565 if (restore_lr && !restoring_GPRs_inline)
28567 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28568 restore_saved_lr (0, exit_func);
28571 /* Restore fpr's if we need to do it without calling a function. */
28572 if (restoring_FPRs_inline)
28574 int offset = info->fp_save_offset + frame_off;
28575 for (i = info->first_fp_reg_save; i < 64; i++)
28578 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
28580 rtx reg = gen_rtx_REG (fp_reg_mode, i);
28581 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28582 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28583 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
28587 offset += fp_reg_size;
28591 /* If we saved cr, restore it here. Just those that were used. */
28592 if (info->cr_save_p)
28593 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
28595 /* If this is V.4, unwind the stack pointer after all of the loads
28596 have been done, or set up r11 if we are restoring fp out of line. */
28598 if (!restoring_FPRs_inline)
28600 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28601 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28602 ptr_regno = ptr_regno_for_savres (sel);
28605 insn = rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28606 if (REGNO (frame_reg_rtx) == ptr_regno)
28609 if (insn && restoring_FPRs_inline)
28613 REG_NOTES (insn) = cfa_restores;
28614 cfa_restores = NULL_RTX;
28616 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28617 RTX_FRAME_RELATED_P (insn) = 1;
28620 if (epilogue_type == EPILOGUE_TYPE_EH_RETURN)
28622 rtx sa = EH_RETURN_STACKADJ_RTX;
28623 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
28626 if (epilogue_type != EPILOGUE_TYPE_SIBCALL && restoring_FPRs_inline)
28630 /* We can't hang the cfa_restores off a simple return,
28631 since the shrink-wrap code sometimes uses an existing
28632 return. This means there might be a path from
28633 pre-prologue code to this return, and dwarf2cfi code
28634 wants the eh_frame unwinder state to be the same on
28635 all paths to any point. So we need to emit the
28636 cfa_restores before the return. For -m64 we really
28637 don't need epilogue cfa_restores at all, except for
28638 this irritating dwarf2cfi with shrink-wrap
28639 requirement; The stack red-zone means eh_frame info
28640 from the prologue telling the unwinder to restore
28641 from the stack is perfectly good right to the end of
28643 emit_insn (gen_blockage ());
28644 emit_cfa_restores (cfa_restores);
28645 cfa_restores = NULL_RTX;
28648 emit_jump_insn (targetm.gen_simple_return ());
28651 if (epilogue_type != EPILOGUE_TYPE_SIBCALL && !restoring_FPRs_inline)
28653 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28654 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
28656 RTVEC_ELT (p, elt++) = ret_rtx;
28658 RTVEC_ELT (p, elt++) = gen_hard_reg_clobber (Pmode, LR_REGNO);
28660 /* We have to restore more than two FP registers, so branch to the
28661 restore function. It will return to our caller. */
28666 if (flag_shrink_wrap)
28667 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28669 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
28670 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
28671 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
28672 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
28674 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28676 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
28678 RTVEC_ELT (p, elt++)
28679 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
28680 if (flag_shrink_wrap
28681 && save_reg_p (info->first_fp_reg_save + i))
28682 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28685 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28690 if (epilogue_type == EPILOGUE_TYPE_SIBCALL)
28691 /* Ensure the cfa_restores are hung off an insn that won't
28692 be reordered above other restores. */
28693 emit_insn (gen_blockage ());
28695 emit_cfa_restores (cfa_restores);
28699 /* Write function epilogue. */
28702 rs6000_output_function_epilogue (FILE *file)
28705 macho_branch_islands ();
28708 rtx_insn *insn = get_last_insn ();
28709 rtx_insn *deleted_debug_label = NULL;
28711 /* Mach-O doesn't support labels at the end of objects, so if
28712 it looks like we might want one, take special action.
28714 First, collect any sequence of deleted debug labels. */
28717 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
28719 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
28720 notes only, instead set their CODE_LABEL_NUMBER to -1,
28721 otherwise there would be code generation differences
28722 in between -g and -g0. */
28723 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28724 deleted_debug_label = insn;
28725 insn = PREV_INSN (insn);
28728 /* Second, if we have:
28731 then this needs to be detected, so skip past the barrier. */
28733 if (insn && BARRIER_P (insn))
28734 insn = PREV_INSN (insn);
28736 /* Up to now we've only seen notes or barriers. */
28741 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
28742 /* Trailing label: <barrier>. */
28743 fputs ("\tnop\n", file);
28746 /* Lastly, see if we have a completely empty function body. */
28747 while (insn && ! INSN_P (insn))
28748 insn = PREV_INSN (insn);
28749 /* If we don't find any insns, we've got an empty function body;
28750 I.e. completely empty - without a return or branch. This is
28751 taken as the case where a function body has been removed
28752 because it contains an inline __builtin_unreachable(). GCC
28753 states that reaching __builtin_unreachable() means UB so we're
28754 not obliged to do anything special; however, we want
28755 non-zero-sized function bodies. To meet this, and help the
28756 user out, let's trap the case. */
28758 fputs ("\ttrap\n", file);
28761 else if (deleted_debug_label)
28762 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
28763 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28764 CODE_LABEL_NUMBER (insn) = -1;
28768 /* Output a traceback table here. See /usr/include/sys/debug.h for info
28771 We don't output a traceback table if -finhibit-size-directive was
28772 used. The documentation for -finhibit-size-directive reads
28773 ``don't output a @code{.size} assembler directive, or anything
28774 else that would cause trouble if the function is split in the
28775 middle, and the two halves are placed at locations far apart in
28776 memory.'' The traceback table has this property, since it
28777 includes the offset from the start of the function to the
28778 traceback table itself.
28780 System V.4 Powerpc's (and the embedded ABI derived from it) use a
28781 different traceback table. */
28782 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28783 && ! flag_inhibit_size_directive
28784 && rs6000_traceback != traceback_none && !cfun->is_thunk)
28786 const char *fname = NULL;
28787 const char *language_string = lang_hooks.name;
28788 int fixed_parms = 0, float_parms = 0, parm_info = 0;
28790 int optional_tbtab;
28791 rs6000_stack_t *info = rs6000_stack_info ();
28793 if (rs6000_traceback == traceback_full)
28794 optional_tbtab = 1;
28795 else if (rs6000_traceback == traceback_part)
28796 optional_tbtab = 0;
28798 optional_tbtab = !optimize_size && !TARGET_ELF;
28800 if (optional_tbtab)
28802 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28803 while (*fname == '.') /* V.4 encodes . in the name */
28806 /* Need label immediately before tbtab, so we can compute
28807 its offset from the function start. */
28808 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
28809 ASM_OUTPUT_LABEL (file, fname);
28812 /* The .tbtab pseudo-op can only be used for the first eight
28813 expressions, since it can't handle the possibly variable
28814 length fields that follow. However, if you omit the optional
28815 fields, the assembler outputs zeros for all optional fields
28816 anyways, giving each variable length field is minimum length
28817 (as defined in sys/debug.h). Thus we cannot use the .tbtab
28818 pseudo-op at all. */
28820 /* An all-zero word flags the start of the tbtab, for debuggers
28821 that have to find it by searching forward from the entry
28822 point or from the current pc. */
28823 fputs ("\t.long 0\n", file);
28825 /* Tbtab format type. Use format type 0. */
28826 fputs ("\t.byte 0,", file);
28828 /* Language type. Unfortunately, there does not seem to be any
28829 official way to discover the language being compiled, so we
28830 use language_string.
28831 C is 0. Fortran is 1. Ada is 3. C++ is 9.
28832 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
28833 a number, so for now use 9. LTO, Go, D, and JIT aren't assigned
28834 numbers either, so for now use 0. */
28836 || ! strcmp (language_string, "GNU GIMPLE")
28837 || ! strcmp (language_string, "GNU Go")
28838 || ! strcmp (language_string, "GNU D")
28839 || ! strcmp (language_string, "libgccjit"))
28841 else if (! strcmp (language_string, "GNU F77")
28842 || lang_GNU_Fortran ())
28844 else if (! strcmp (language_string, "GNU Ada"))
28846 else if (lang_GNU_CXX ()
28847 || ! strcmp (language_string, "GNU Objective-C++"))
28849 else if (! strcmp (language_string, "GNU Java"))
28851 else if (! strcmp (language_string, "GNU Objective-C"))
28854 gcc_unreachable ();
28855 fprintf (file, "%d,", i);
28857 /* 8 single bit fields: global linkage (not set for C extern linkage,
28858 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
28859 from start of procedure stored in tbtab, internal function, function
28860 has controlled storage, function has no toc, function uses fp,
28861 function logs/aborts fp operations. */
28862 /* Assume that fp operations are used if any fp reg must be saved. */
28863 fprintf (file, "%d,",
28864 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
28866 /* 6 bitfields: function is interrupt handler, name present in
28867 proc table, function calls alloca, on condition directives
28868 (controls stack walks, 3 bits), saves condition reg, saves
28870 /* The `function calls alloca' bit seems to be set whenever reg 31 is
28871 set up as a frame pointer, even when there is no alloca call. */
28872 fprintf (file, "%d,",
28873 ((optional_tbtab << 6)
28874 | ((optional_tbtab & frame_pointer_needed) << 5)
28875 | (info->cr_save_p << 1)
28876 | (info->lr_save_p)));
28878 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
28880 fprintf (file, "%d,",
28881 (info->push_p << 7) | (64 - info->first_fp_reg_save));
28883 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
28884 fprintf (file, "%d,", (32 - first_reg_to_save ()));
28886 if (optional_tbtab)
28888 /* Compute the parameter info from the function decl argument
28891 int next_parm_info_bit = 31;
28893 for (decl = DECL_ARGUMENTS (current_function_decl);
28894 decl; decl = DECL_CHAIN (decl))
28896 rtx parameter = DECL_INCOMING_RTL (decl);
28897 machine_mode mode = GET_MODE (parameter);
28899 if (REG_P (parameter))
28901 if (SCALAR_FLOAT_MODE_P (mode))
28924 gcc_unreachable ();
28927 /* If only one bit will fit, don't or in this entry. */
28928 if (next_parm_info_bit > 0)
28929 parm_info |= (bits << (next_parm_info_bit - 1));
28930 next_parm_info_bit -= 2;
28934 fixed_parms += ((GET_MODE_SIZE (mode)
28935 + (UNITS_PER_WORD - 1))
28937 next_parm_info_bit -= 1;
28943 /* Number of fixed point parameters. */
28944 /* This is actually the number of words of fixed point parameters; thus
28945 an 8 byte struct counts as 2; and thus the maximum value is 8. */
28946 fprintf (file, "%d,", fixed_parms);
28948 /* 2 bitfields: number of floating point parameters (7 bits), parameters
28950 /* This is actually the number of fp registers that hold parameters;
28951 and thus the maximum value is 13. */
28952 /* Set parameters on stack bit if parameters are not in their original
28953 registers, regardless of whether they are on the stack? Xlc
28954 seems to set the bit when not optimizing. */
28955 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
28957 if (optional_tbtab)
28959 /* Optional fields follow. Some are variable length. */
28961 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
28962 float, 11 double float. */
28963 /* There is an entry for each parameter in a register, in the order
28964 that they occur in the parameter list. Any intervening arguments
28965 on the stack are ignored. If the list overflows a long (max
28966 possible length 34 bits) then completely leave off all elements
28968 /* Only emit this long if there was at least one parameter. */
28969 if (fixed_parms || float_parms)
28970 fprintf (file, "\t.long %d\n", parm_info);
28972 /* Offset from start of code to tb table. */
28973 fputs ("\t.long ", file);
28974 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
28975 RS6000_OUTPUT_BASENAME (file, fname);
28977 rs6000_output_function_entry (file, fname);
28980 /* Interrupt handler mask. */
28981 /* Omit this long, since we never set the interrupt handler bit
28984 /* Number of CTL (controlled storage) anchors. */
28985 /* Omit this long, since the has_ctl bit is never set above. */
28987 /* Displacement into stack of each CTL anchor. */
28988 /* Omit this list of longs, because there are no CTL anchors. */
28990 /* Length of function name. */
28993 fprintf (file, "\t.short %d\n", (int) strlen (fname));
28995 /* Function name. */
28996 assemble_string (fname, strlen (fname));
28998 /* Register for alloca automatic storage; this is always reg 31.
28999 Only emit this if the alloca bit was set above. */
29000 if (frame_pointer_needed)
29001 fputs ("\t.byte 31\n", file);
29003 fputs ("\t.align 2\n", file);
29007 /* Arrange to define .LCTOC1 label, if not already done. */
29011 if (!toc_initialized)
29013 switch_to_section (toc_section);
29014 switch_to_section (current_function_section ());
29019 /* -fsplit-stack support. */
29021 /* A SYMBOL_REF for __morestack. */
29022 static GTY(()) rtx morestack_ref;
29025 gen_add3_const (rtx rt, rtx ra, long c)
29028 return gen_adddi3 (rt, ra, GEN_INT (c));
29030 return gen_addsi3 (rt, ra, GEN_INT (c));
29033 /* Emit -fsplit-stack prologue, which goes before the regular function
29034 prologue (at local entry point in the case of ELFv2). */
29037 rs6000_expand_split_stack_prologue (void)
29039 rs6000_stack_t *info = rs6000_stack_info ();
29040 unsigned HOST_WIDE_INT allocate;
29041 long alloc_hi, alloc_lo;
29042 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
29045 gcc_assert (flag_split_stack && reload_completed);
29050 if (global_regs[29])
29052 error ("%qs uses register r29", "%<-fsplit-stack%>");
29053 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
29054 "conflicts with %qD", global_regs_decl[29]);
29057 allocate = info->total_size;
29058 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
29060 sorry ("Stack frame larger than 2G is not supported for "
29061 "%<-fsplit-stack%>");
29064 if (morestack_ref == NULL_RTX)
29066 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
29067 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
29068 | SYMBOL_FLAG_FUNCTION);
29071 r0 = gen_rtx_REG (Pmode, 0);
29072 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29073 r12 = gen_rtx_REG (Pmode, 12);
29074 emit_insn (gen_load_split_stack_limit (r0));
29075 /* Always emit two insns here to calculate the requested stack,
29076 so that the linker can edit them when adjusting size for calling
29077 non-split-stack code. */
29078 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
29079 alloc_lo = -allocate - alloc_hi;
29082 emit_insn (gen_add3_const (r12, r1, alloc_hi));
29084 emit_insn (gen_add3_const (r12, r12, alloc_lo));
29086 emit_insn (gen_nop ());
29090 emit_insn (gen_add3_const (r12, r1, alloc_lo));
29091 emit_insn (gen_nop ());
29094 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
29095 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
29096 ok_label = gen_label_rtx ();
29097 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29098 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
29099 gen_rtx_LABEL_REF (VOIDmode, ok_label),
29101 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29102 JUMP_LABEL (insn) = ok_label;
29103 /* Mark the jump as very likely to be taken. */
29104 add_reg_br_prob_note (insn, profile_probability::very_likely ());
29106 lr = gen_rtx_REG (Pmode, LR_REGNO);
29107 insn = emit_move_insn (r0, lr);
29108 RTX_FRAME_RELATED_P (insn) = 1;
29109 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
29110 RTX_FRAME_RELATED_P (insn) = 1;
29112 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
29113 const0_rtx, const0_rtx));
29114 call_fusage = NULL_RTX;
29115 use_reg (&call_fusage, r12);
29116 /* Say the call uses r0, even though it doesn't, to stop regrename
29117 from twiddling with the insns saving lr, trashing args for cfun.
29118 The insns restoring lr are similarly protected by making
29119 split_stack_return use r0. */
29120 use_reg (&call_fusage, r0);
29121 add_function_usage_to (insn, call_fusage);
29122 /* Indicate that this function can't jump to non-local gotos. */
29123 make_reg_eh_region_note_nothrow_nononlocal (insn);
29124 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
29125 insn = emit_move_insn (lr, r0);
29126 add_reg_note (insn, REG_CFA_RESTORE, lr);
29127 RTX_FRAME_RELATED_P (insn) = 1;
29128 emit_insn (gen_split_stack_return ());
29130 emit_label (ok_label);
29131 LABEL_NUSES (ok_label) = 1;
29134 /* Return the internal arg pointer used for function incoming
29135 arguments. When -fsplit-stack, the arg pointer is r12 so we need
29136 to copy it to a pseudo in order for it to be preserved over calls
29137 and suchlike. We'd really like to use a pseudo here for the
29138 internal arg pointer but data-flow analysis is not prepared to
29139 accept pseudos as live at the beginning of a function. */
29142 rs6000_internal_arg_pointer (void)
29144 if (flag_split_stack
29145 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
29149 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
29153 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
29154 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
29156 /* Put the pseudo initialization right after the note at the
29157 beginning of the function. */
29158 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
29159 gen_rtx_REG (Pmode, 12));
29160 push_topmost_sequence ();
29161 emit_insn_after (pat, get_insns ());
29162 pop_topmost_sequence ();
29164 rtx ret = plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
29165 FIRST_PARM_OFFSET (current_function_decl));
29166 return copy_to_reg (ret);
29168 return virtual_incoming_args_rtx;
29171 /* We may have to tell the dataflow pass that the split stack prologue
29172 is initializing a register. */
29175 rs6000_live_on_entry (bitmap regs)
29177 if (flag_split_stack)
29178 bitmap_set_bit (regs, 12);
29181 /* Emit -fsplit-stack dynamic stack allocation space check. */
29184 rs6000_split_stack_space_check (rtx size, rtx label)
29186 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29187 rtx limit = gen_reg_rtx (Pmode);
29188 rtx requested = gen_reg_rtx (Pmode);
29189 rtx cmp = gen_reg_rtx (CCUNSmode);
29192 emit_insn (gen_load_split_stack_limit (limit));
29193 if (CONST_INT_P (size))
29194 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
29197 size = force_reg (Pmode, size);
29198 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
29200 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
29201 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29202 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
29203 gen_rtx_LABEL_REF (VOIDmode, label),
29205 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29206 JUMP_LABEL (jump) = label;
29209 /* A C compound statement that outputs the assembler code for a thunk
29210 function, used to implement C++ virtual function calls with
29211 multiple inheritance. The thunk acts as a wrapper around a virtual
29212 function, adjusting the implicit object parameter before handing
29213 control off to the real function.
29215 First, emit code to add the integer DELTA to the location that
29216 contains the incoming first argument. Assume that this argument
29217 contains a pointer, and is the one used to pass the `this' pointer
29218 in C++. This is the incoming argument *before* the function
29219 prologue, e.g. `%o0' on a sparc. The addition must preserve the
29220 values of all other incoming arguments.
29222 After the addition, emit code to jump to FUNCTION, which is a
29223 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
29224 not touch the return address. Hence returning from FUNCTION will
29225 return to whoever called the current `thunk'.
29227 The effect must be as if FUNCTION had been called directly with the
29228 adjusted first argument. This macro is responsible for emitting
29229 all of the code for a thunk function; output_function_prologue()
29230 and output_function_epilogue() are not invoked.
29232 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
29233 been extracted from it.) It might possibly be useful on some
29234 targets, but probably not.
29236 If you do not define this macro, the target-independent code in the
29237 C++ frontend will generate a less efficient heavyweight thunk that
29238 calls FUNCTION instead of jumping to it. The generic approach does
29239 not support varargs. */
29242 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
29243 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
29246 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
29247 rtx this_rtx, funexp;
29250 reload_completed = 1;
29251 epilogue_completed = 1;
29253 /* Mark the end of the (empty) prologue. */
29254 emit_note (NOTE_INSN_PROLOGUE_END);
29256 /* Find the "this" pointer. If the function returns a structure,
29257 the structure return pointer is in r3. */
29258 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
29259 this_rtx = gen_rtx_REG (Pmode, 4);
29261 this_rtx = gen_rtx_REG (Pmode, 3);
29263 /* Apply the constant offset, if required. */
29265 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
29267 /* Apply the offset from the vtable, if required. */
29270 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
29271 rtx tmp = gen_rtx_REG (Pmode, 12);
29273 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
29274 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
29276 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
29277 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
29281 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
29283 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
29285 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
29288 /* Generate a tail call to the target function. */
29289 if (!TREE_USED (function))
29291 assemble_external (function);
29292 TREE_USED (function) = 1;
29294 funexp = XEXP (DECL_RTL (function), 0);
29295 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29298 if (MACHOPIC_INDIRECT)
29299 funexp = machopic_indirect_call_target (funexp);
29302 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
29303 generate sibcall RTL explicitly. */
29304 insn = emit_call_insn (
29305 gen_rtx_PARALLEL (VOIDmode,
29307 gen_rtx_CALL (VOIDmode,
29308 funexp, const0_rtx),
29309 gen_rtx_USE (VOIDmode, const0_rtx),
29310 simple_return_rtx)));
29311 SIBLING_CALL_P (insn) = 1;
29314 /* Run just enough of rest_of_compilation to get the insns emitted.
29315 There's not really enough bulk here to make other passes such as
29316 instruction scheduling worth while. Note that use_thunk calls
29317 assemble_start_function and assemble_end_function. */
29318 insn = get_insns ();
29319 shorten_branches (insn);
29320 assemble_start_function (thunk_fndecl, fnname);
29321 final_start_function (insn, file, 1);
29322 final (insn, file, 1);
29323 final_end_function ();
29324 assemble_end_function (thunk_fndecl, fnname);
29326 reload_completed = 0;
29327 epilogue_completed = 0;
29330 /* A quick summary of the various types of 'constant-pool tables'
29333 Target Flags Name One table per
29334 AIX (none) AIX TOC object file
29335 AIX -mfull-toc AIX TOC object file
29336 AIX -mminimal-toc AIX minimal TOC translation unit
29337 SVR4/EABI (none) SVR4 SDATA object file
29338 SVR4/EABI -fpic SVR4 pic object file
29339 SVR4/EABI -fPIC SVR4 PIC translation unit
29340 SVR4/EABI -mrelocatable EABI TOC function
29341 SVR4/EABI -maix AIX TOC object file
29342 SVR4/EABI -maix -mminimal-toc
29343 AIX minimal TOC translation unit
29345 Name Reg. Set by entries contains:
29346 made by addrs? fp? sum?
29348 AIX TOC 2 crt0 as Y option option
29349 AIX minimal TOC 30 prolog gcc Y Y option
29350 SVR4 SDATA 13 crt0 gcc N Y N
29351 SVR4 pic 30 prolog ld Y not yet N
29352 SVR4 PIC 30 prolog gcc Y option option
29353 EABI TOC 30 prolog gcc Y option option
29357 /* Hash functions for the hash table. */
29360 rs6000_hash_constant (rtx k)
29362 enum rtx_code code = GET_CODE (k);
29363 machine_mode mode = GET_MODE (k);
29364 unsigned result = (code << 3) ^ mode;
29365 const char *format;
29368 format = GET_RTX_FORMAT (code);
29369 flen = strlen (format);
29375 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
29377 case CONST_WIDE_INT:
29380 flen = CONST_WIDE_INT_NUNITS (k);
29381 for (i = 0; i < flen; i++)
29382 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
29387 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
29397 for (; fidx < flen; fidx++)
29398 switch (format[fidx])
29403 const char *str = XSTR (k, fidx);
29404 len = strlen (str);
29405 result = result * 613 + len;
29406 for (i = 0; i < len; i++)
29407 result = result * 613 + (unsigned) str[i];
29412 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
29416 result = result * 613 + (unsigned) XINT (k, fidx);
29419 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
29420 result = result * 613 + (unsigned) XWINT (k, fidx);
29424 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
29425 result = result * 613 + (unsigned) (XWINT (k, fidx)
29432 gcc_unreachable ();
29439 toc_hasher::hash (toc_hash_struct *thc)
29441 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
29444 /* Compare H1 and H2 for equivalence. */
29447 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
29452 if (h1->key_mode != h2->key_mode)
29455 return rtx_equal_p (r1, r2);
29458 /* These are the names given by the C++ front-end to vtables, and
29459 vtable-like objects. Ideally, this logic should not be here;
29460 instead, there should be some programmatic way of inquiring as
29461 to whether or not an object is a vtable. */
29463 #define VTABLE_NAME_P(NAME) \
29464 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
29465 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
29466 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
29467 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
29468 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
29470 #ifdef NO_DOLLAR_IN_LABEL
29471 /* Return a GGC-allocated character string translating dollar signs in
29472 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
29475 rs6000_xcoff_strip_dollar (const char *name)
29481 q = (const char *) strchr (name, '$');
29483 if (q == 0 || q == name)
29486 len = strlen (name);
29487 strip = XALLOCAVEC (char, len + 1);
29488 strcpy (strip, name);
29489 p = strip + (q - name);
29493 p = strchr (p + 1, '$');
29496 return ggc_alloc_string (strip, len);
29501 rs6000_output_symbol_ref (FILE *file, rtx x)
29503 const char *name = XSTR (x, 0);
29505 /* Currently C++ toc references to vtables can be emitted before it
29506 is decided whether the vtable is public or private. If this is
29507 the case, then the linker will eventually complain that there is
29508 a reference to an unknown section. Thus, for vtables only,
29509 we emit the TOC reference to reference the identifier and not the
29511 if (VTABLE_NAME_P (name))
29513 RS6000_OUTPUT_BASENAME (file, name);
29516 assemble_name (file, name);
29519 /* Output a TOC entry. We derive the entry name from what is being
29523 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
29526 const char *name = buf;
29528 HOST_WIDE_INT offset = 0;
29530 gcc_assert (!TARGET_NO_TOC);
29532 /* When the linker won't eliminate them, don't output duplicate
29533 TOC entries (this happens on AIX if there is any kind of TOC,
29534 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
29536 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
29538 struct toc_hash_struct *h;
29540 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
29541 time because GGC is not initialized at that point. */
29542 if (toc_hash_table == NULL)
29543 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
29545 h = ggc_alloc<toc_hash_struct> ();
29547 h->key_mode = mode;
29548 h->labelno = labelno;
29550 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
29551 if (*found == NULL)
29553 else /* This is indeed a duplicate.
29554 Set this label equal to that label. */
29556 fputs ("\t.set ", file);
29557 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29558 fprintf (file, "%d,", labelno);
29559 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29560 fprintf (file, "%d\n", ((*found)->labelno));
29563 if (TARGET_XCOFF && SYMBOL_REF_P (x)
29564 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
29565 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
29567 fputs ("\t.set ", file);
29568 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29569 fprintf (file, "%d,", labelno);
29570 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29571 fprintf (file, "%d\n", ((*found)->labelno));
29578 /* If we're going to put a double constant in the TOC, make sure it's
29579 aligned properly when strict alignment is on. */
29580 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
29581 && STRICT_ALIGNMENT
29582 && GET_MODE_BITSIZE (mode) >= 64
29583 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
29584 ASM_OUTPUT_ALIGN (file, 3);
29587 (*targetm.asm_out.internal_label) (file, "LC", labelno);
29589 /* Handle FP constants specially. Note that if we have a minimal
29590 TOC, things we put here aren't actually in the TOC, so we can allow
29592 if (CONST_DOUBLE_P (x)
29593 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
29594 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
29598 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29599 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
29601 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29605 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29606 fputs (DOUBLE_INT_ASM_OP, file);
29608 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29609 k[0] & 0xffffffff, k[1] & 0xffffffff,
29610 k[2] & 0xffffffff, k[3] & 0xffffffff);
29611 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
29612 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29613 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
29614 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
29615 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
29620 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29621 fputs ("\t.long ", file);
29623 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29624 k[0] & 0xffffffff, k[1] & 0xffffffff,
29625 k[2] & 0xffffffff, k[3] & 0xffffffff);
29626 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
29627 k[0] & 0xffffffff, k[1] & 0xffffffff,
29628 k[2] & 0xffffffff, k[3] & 0xffffffff);
29632 else if (CONST_DOUBLE_P (x)
29633 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
29637 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29638 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
29640 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29644 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29645 fputs (DOUBLE_INT_ASM_OP, file);
29647 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29648 k[0] & 0xffffffff, k[1] & 0xffffffff);
29649 fprintf (file, "0x%lx%08lx\n",
29650 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29651 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
29656 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29657 fputs ("\t.long ", file);
29659 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29660 k[0] & 0xffffffff, k[1] & 0xffffffff);
29661 fprintf (file, "0x%lx,0x%lx\n",
29662 k[0] & 0xffffffff, k[1] & 0xffffffff);
29666 else if (CONST_DOUBLE_P (x)
29667 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
29671 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29672 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
29674 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
29678 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29679 fputs (DOUBLE_INT_ASM_OP, file);
29681 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29682 if (WORDS_BIG_ENDIAN)
29683 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
29685 fprintf (file, "0x%lx\n", l & 0xffffffff);
29690 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29691 fputs ("\t.long ", file);
29693 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29694 fprintf (file, "0x%lx\n", l & 0xffffffff);
29698 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
29700 unsigned HOST_WIDE_INT low;
29701 HOST_WIDE_INT high;
29703 low = INTVAL (x) & 0xffffffff;
29704 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
29706 /* TOC entries are always Pmode-sized, so when big-endian
29707 smaller integer constants in the TOC need to be padded.
29708 (This is still a win over putting the constants in
29709 a separate constant pool, because then we'd have
29710 to have both a TOC entry _and_ the actual constant.)
29712 For a 32-bit target, CONST_INT values are loaded and shifted
29713 entirely within `low' and can be stored in one TOC entry. */
29715 /* It would be easy to make this work, but it doesn't now. */
29716 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
29718 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
29721 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
29722 high = (HOST_WIDE_INT) low >> 32;
29728 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29729 fputs (DOUBLE_INT_ASM_OP, file);
29731 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29732 (long) high & 0xffffffff, (long) low & 0xffffffff);
29733 fprintf (file, "0x%lx%08lx\n",
29734 (long) high & 0xffffffff, (long) low & 0xffffffff);
29739 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
29741 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29742 fputs ("\t.long ", file);
29744 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29745 (long) high & 0xffffffff, (long) low & 0xffffffff);
29746 fprintf (file, "0x%lx,0x%lx\n",
29747 (long) high & 0xffffffff, (long) low & 0xffffffff);
29751 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29752 fputs ("\t.long ", file);
29754 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
29755 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
29761 if (GET_CODE (x) == CONST)
29763 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
29764 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
29766 base = XEXP (XEXP (x, 0), 0);
29767 offset = INTVAL (XEXP (XEXP (x, 0), 1));
29770 switch (GET_CODE (base))
29773 name = XSTR (base, 0);
29777 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
29778 CODE_LABEL_NUMBER (XEXP (base, 0)));
29782 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
29786 gcc_unreachable ();
29789 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29790 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
29793 fputs ("\t.tc ", file);
29794 RS6000_OUTPUT_BASENAME (file, name);
29797 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
29799 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
29801 /* Mark large TOC symbols on AIX with [TE] so they are mapped
29802 after other TOC symbols, reducing overflow of small TOC access
29803 to [TC] symbols. */
29804 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
29805 ? "[TE]," : "[TC],", file);
29808 /* Currently C++ toc references to vtables can be emitted before it
29809 is decided whether the vtable is public or private. If this is
29810 the case, then the linker will eventually complain that there is
29811 a TOC reference to an unknown section. Thus, for vtables only,
29812 we emit the TOC reference to reference the symbol and not the
29814 if (VTABLE_NAME_P (name))
29816 RS6000_OUTPUT_BASENAME (file, name);
29818 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
29819 else if (offset > 0)
29820 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
29823 output_addr_const (file, x);
29826 if (TARGET_XCOFF && SYMBOL_REF_P (base))
29828 switch (SYMBOL_REF_TLS_MODEL (base))
29832 case TLS_MODEL_LOCAL_EXEC:
29833 fputs ("@le", file);
29835 case TLS_MODEL_INITIAL_EXEC:
29836 fputs ("@ie", file);
29838 /* Use global-dynamic for local-dynamic. */
29839 case TLS_MODEL_GLOBAL_DYNAMIC:
29840 case TLS_MODEL_LOCAL_DYNAMIC:
29842 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
29843 fputs ("\t.tc .", file);
29844 RS6000_OUTPUT_BASENAME (file, name);
29845 fputs ("[TC],", file);
29846 output_addr_const (file, x);
29847 fputs ("@m", file);
29850 gcc_unreachable ();
29858 /* Output an assembler pseudo-op to write an ASCII string of N characters
29859 starting at P to FILE.
29861 On the RS/6000, we have to do this using the .byte operation and
29862 write out special characters outside the quoted string.
29863 Also, the assembler is broken; very long strings are truncated,
29864 so we must artificially break them up early. */
29867 output_ascii (FILE *file, const char *p, int n)
29870 int i, count_string;
29871 const char *for_string = "\t.byte \"";
29872 const char *for_decimal = "\t.byte ";
29873 const char *to_close = NULL;
29876 for (i = 0; i < n; i++)
29879 if (c >= ' ' && c < 0177)
29882 fputs (for_string, file);
29885 /* Write two quotes to get one. */
29893 for_decimal = "\"\n\t.byte ";
29897 if (count_string >= 512)
29899 fputs (to_close, file);
29901 for_string = "\t.byte \"";
29902 for_decimal = "\t.byte ";
29910 fputs (for_decimal, file);
29911 fprintf (file, "%d", c);
29913 for_string = "\n\t.byte \"";
29914 for_decimal = ", ";
29920 /* Now close the string if we have written one. Then end the line. */
29922 fputs (to_close, file);
29925 /* Generate a unique section name for FILENAME for a section type
29926 represented by SECTION_DESC. Output goes into BUF.
29928 SECTION_DESC can be any string, as long as it is different for each
29929 possible section type.
29931 We name the section in the same manner as xlc. The name begins with an
29932 underscore followed by the filename (after stripping any leading directory
29933 names) with the last period replaced by the string SECTION_DESC. If
29934 FILENAME does not contain a period, SECTION_DESC is appended to the end of
29938 rs6000_gen_section_name (char **buf, const char *filename,
29939 const char *section_desc)
29941 const char *q, *after_last_slash, *last_period = 0;
29945 after_last_slash = filename;
29946 for (q = filename; *q; q++)
29949 after_last_slash = q + 1;
29950 else if (*q == '.')
29954 len = strlen (after_last_slash) + strlen (section_desc) + 2;
29955 *buf = (char *) xmalloc (len);
29960 for (q = after_last_slash; *q; q++)
29962 if (q == last_period)
29964 strcpy (p, section_desc);
29965 p += strlen (section_desc);
29969 else if (ISALNUM (*q))
29973 if (last_period == 0)
29974 strcpy (p, section_desc);
29979 /* Emit profile function. */
29982 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
29984 /* Non-standard profiling for kernels, which just saves LR then calls
29985 _mcount without worrying about arg saves. The idea is to change
29986 the function prologue as little as possible as it isn't easy to
29987 account for arg save/restore code added just for _mcount. */
29988 if (TARGET_PROFILE_KERNEL)
29991 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29993 #ifndef NO_PROFILE_COUNTERS
29994 # define NO_PROFILE_COUNTERS 0
29996 if (NO_PROFILE_COUNTERS)
29997 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
29998 LCT_NORMAL, VOIDmode);
30002 const char *label_name;
30005 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30006 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
30007 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
30009 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
30010 LCT_NORMAL, VOIDmode, fun, Pmode);
30013 else if (DEFAULT_ABI == ABI_DARWIN)
30015 const char *mcount_name = RS6000_MCOUNT;
30016 int caller_addr_regno = LR_REGNO;
30018 /* Be conservative and always set this, at least for now. */
30019 crtl->uses_pic_offset_table = 1;
30022 /* For PIC code, set up a stub and collect the caller's address
30023 from r0, which is where the prologue puts it. */
30024 if (MACHOPIC_INDIRECT
30025 && crtl->uses_pic_offset_table)
30026 caller_addr_regno = 0;
30028 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
30029 LCT_NORMAL, VOIDmode,
30030 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
30034 /* Write function profiler code. */
30037 output_function_profiler (FILE *file, int labelno)
30041 switch (DEFAULT_ABI)
30044 gcc_unreachable ();
30049 warning (0, "no profiling of 64-bit code for this ABI");
30052 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30053 fprintf (file, "\tmflr %s\n", reg_names[0]);
30054 if (NO_PROFILE_COUNTERS)
30056 asm_fprintf (file, "\tstw %s,4(%s)\n",
30057 reg_names[0], reg_names[1]);
30059 else if (TARGET_SECURE_PLT && flag_pic)
30061 if (TARGET_LINK_STACK)
30064 get_ppc476_thunk_name (name);
30065 asm_fprintf (file, "\tbl %s\n", name);
30068 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
30069 asm_fprintf (file, "\tstw %s,4(%s)\n",
30070 reg_names[0], reg_names[1]);
30071 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30072 asm_fprintf (file, "\taddis %s,%s,",
30073 reg_names[12], reg_names[12]);
30074 assemble_name (file, buf);
30075 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
30076 assemble_name (file, buf);
30077 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
30079 else if (flag_pic == 1)
30081 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
30082 asm_fprintf (file, "\tstw %s,4(%s)\n",
30083 reg_names[0], reg_names[1]);
30084 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30085 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
30086 assemble_name (file, buf);
30087 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
30089 else if (flag_pic > 1)
30091 asm_fprintf (file, "\tstw %s,4(%s)\n",
30092 reg_names[0], reg_names[1]);
30093 /* Now, we need to get the address of the label. */
30094 if (TARGET_LINK_STACK)
30097 get_ppc476_thunk_name (name);
30098 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
30099 assemble_name (file, buf);
30100 fputs ("-.\n1:", file);
30101 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30102 asm_fprintf (file, "\taddi %s,%s,4\n",
30103 reg_names[11], reg_names[11]);
30107 fputs ("\tbcl 20,31,1f\n\t.long ", file);
30108 assemble_name (file, buf);
30109 fputs ("-.\n1:", file);
30110 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30112 asm_fprintf (file, "\tlwz %s,0(%s)\n",
30113 reg_names[0], reg_names[11]);
30114 asm_fprintf (file, "\tadd %s,%s,%s\n",
30115 reg_names[0], reg_names[0], reg_names[11]);
30119 asm_fprintf (file, "\tlis %s,", reg_names[12]);
30120 assemble_name (file, buf);
30121 fputs ("@ha\n", file);
30122 asm_fprintf (file, "\tstw %s,4(%s)\n",
30123 reg_names[0], reg_names[1]);
30124 asm_fprintf (file, "\tla %s,", reg_names[0]);
30125 assemble_name (file, buf);
30126 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
30129 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
30130 fprintf (file, "\tbl %s%s\n",
30131 RS6000_MCOUNT, flag_pic ? "@plt" : "");
30137 /* Don't do anything, done in output_profile_hook (). */
30144 /* The following variable value is the last issued insn. */
30146 static rtx_insn *last_scheduled_insn;
30148 /* The following variable helps to balance issuing of load and
30149 store instructions */
30151 static int load_store_pendulum;
30153 /* The following variable helps pair divide insns during scheduling. */
30154 static int divide_cnt;
30155 /* The following variable helps pair and alternate vector and vector load
30156 insns during scheduling. */
30157 static int vec_pairing;
30160 /* Power4 load update and store update instructions are cracked into a
30161 load or store and an integer insn which are executed in the same cycle.
30162 Branches have their own dispatch slot which does not count against the
30163 GCC issue rate, but it changes the program flow so there are no other
30164 instructions to issue in this cycle. */
30167 rs6000_variable_issue_1 (rtx_insn *insn, int more)
30169 last_scheduled_insn = insn;
30170 if (GET_CODE (PATTERN (insn)) == USE
30171 || GET_CODE (PATTERN (insn)) == CLOBBER)
30173 cached_can_issue_more = more;
30174 return cached_can_issue_more;
30177 if (insn_terminates_group_p (insn, current_group))
30179 cached_can_issue_more = 0;
30180 return cached_can_issue_more;
30183 /* If no reservation, but reach here */
30184 if (recog_memoized (insn) < 0)
30187 if (rs6000_sched_groups)
30189 if (is_microcoded_insn (insn))
30190 cached_can_issue_more = 0;
30191 else if (is_cracked_insn (insn))
30192 cached_can_issue_more = more > 2 ? more - 2 : 0;
30194 cached_can_issue_more = more - 1;
30196 return cached_can_issue_more;
30199 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
30202 cached_can_issue_more = more - 1;
30203 return cached_can_issue_more;
30207 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
30209 int r = rs6000_variable_issue_1 (insn, more);
30211 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
30215 /* Adjust the cost of a scheduling dependency. Return the new cost of
30216 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
30219 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
30222 enum attr_type attr_type;
30224 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
30231 /* Data dependency; DEP_INSN writes a register that INSN reads
30232 some cycles later. */
30234 /* Separate a load from a narrower, dependent store. */
30235 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9)
30236 && GET_CODE (PATTERN (insn)) == SET
30237 && GET_CODE (PATTERN (dep_insn)) == SET
30238 && MEM_P (XEXP (PATTERN (insn), 1))
30239 && MEM_P (XEXP (PATTERN (dep_insn), 0))
30240 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
30241 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
30244 attr_type = get_attr_type (insn);
30249 /* Tell the first scheduling pass about the latency between
30250 a mtctr and bctr (and mtlr and br/blr). The first
30251 scheduling pass will not know about this latency since
30252 the mtctr instruction, which has the latency associated
30253 to it, will be generated by reload. */
30256 /* Leave some extra cycles between a compare and its
30257 dependent branch, to inhibit expensive mispredicts. */
30258 if ((rs6000_tune == PROCESSOR_PPC603
30259 || rs6000_tune == PROCESSOR_PPC604
30260 || rs6000_tune == PROCESSOR_PPC604e
30261 || rs6000_tune == PROCESSOR_PPC620
30262 || rs6000_tune == PROCESSOR_PPC630
30263 || rs6000_tune == PROCESSOR_PPC750
30264 || rs6000_tune == PROCESSOR_PPC7400
30265 || rs6000_tune == PROCESSOR_PPC7450
30266 || rs6000_tune == PROCESSOR_PPCE5500
30267 || rs6000_tune == PROCESSOR_PPCE6500
30268 || rs6000_tune == PROCESSOR_POWER4
30269 || rs6000_tune == PROCESSOR_POWER5
30270 || rs6000_tune == PROCESSOR_POWER7
30271 || rs6000_tune == PROCESSOR_POWER8
30272 || rs6000_tune == PROCESSOR_POWER9
30273 || rs6000_tune == PROCESSOR_CELL)
30274 && recog_memoized (dep_insn)
30275 && (INSN_CODE (dep_insn) >= 0))
30277 switch (get_attr_type (dep_insn))
30280 case TYPE_FPCOMPARE:
30281 case TYPE_CR_LOGICAL:
30285 if (get_attr_dot (dep_insn) == DOT_YES)
30290 if (get_attr_dot (dep_insn) == DOT_YES
30291 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
30302 if ((rs6000_tune == PROCESSOR_POWER6)
30303 && recog_memoized (dep_insn)
30304 && (INSN_CODE (dep_insn) >= 0))
30307 if (GET_CODE (PATTERN (insn)) != SET)
30308 /* If this happens, we have to extend this to schedule
30309 optimally. Return default for now. */
30312 /* Adjust the cost for the case where the value written
30313 by a fixed point operation is used as the address
30314 gen value on a store. */
30315 switch (get_attr_type (dep_insn))
30320 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30321 return get_attr_sign_extend (dep_insn)
30322 == SIGN_EXTEND_YES ? 6 : 4;
30327 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30328 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30338 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30346 if (get_attr_update (dep_insn) == UPDATE_YES
30347 && ! rs6000_store_data_bypass_p (dep_insn, insn))
30353 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30359 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30360 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30370 if ((rs6000_tune == PROCESSOR_POWER6)
30371 && recog_memoized (dep_insn)
30372 && (INSN_CODE (dep_insn) >= 0))
30375 /* Adjust the cost for the case where the value written
30376 by a fixed point instruction is used within the address
30377 gen portion of a subsequent load(u)(x) */
30378 switch (get_attr_type (dep_insn))
30383 if (set_to_load_agen (dep_insn, insn))
30384 return get_attr_sign_extend (dep_insn)
30385 == SIGN_EXTEND_YES ? 6 : 4;
30390 if (set_to_load_agen (dep_insn, insn))
30391 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30401 if (set_to_load_agen (dep_insn, insn))
30409 if (get_attr_update (dep_insn) == UPDATE_YES
30410 && set_to_load_agen (dep_insn, insn))
30416 if (set_to_load_agen (dep_insn, insn))
30422 if (set_to_load_agen (dep_insn, insn))
30423 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30433 if ((rs6000_tune == PROCESSOR_POWER6)
30434 && get_attr_update (insn) == UPDATE_NO
30435 && recog_memoized (dep_insn)
30436 && (INSN_CODE (dep_insn) >= 0)
30437 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
30444 /* Fall out to return default cost. */
30448 case REG_DEP_OUTPUT:
30449 /* Output dependency; DEP_INSN writes a register that INSN writes some
30451 if ((rs6000_tune == PROCESSOR_POWER6)
30452 && recog_memoized (dep_insn)
30453 && (INSN_CODE (dep_insn) >= 0))
30455 attr_type = get_attr_type (insn);
30460 case TYPE_FPSIMPLE:
30461 if (get_attr_type (dep_insn) == TYPE_FP
30462 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
30466 if (get_attr_update (insn) == UPDATE_NO
30467 && get_attr_type (dep_insn) == TYPE_MFFGPR)
30474 /* Fall through, no cost for output dependency. */
30478 /* Anti dependency; DEP_INSN reads a register that INSN writes some
30483 gcc_unreachable ();
30489 /* Debug version of rs6000_adjust_cost. */
30492 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
30493 int cost, unsigned int dw)
30495 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
30503 default: dep = "unknown depencency"; break;
30504 case REG_DEP_TRUE: dep = "data dependency"; break;
30505 case REG_DEP_OUTPUT: dep = "output dependency"; break;
30506 case REG_DEP_ANTI: dep = "anti depencency"; break;
30510 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
30511 "%s, insn:\n", ret, cost, dep);
30519 /* The function returns a true if INSN is microcoded.
30520 Return false otherwise. */
30523 is_microcoded_insn (rtx_insn *insn)
30525 if (!insn || !NONDEBUG_INSN_P (insn)
30526 || GET_CODE (PATTERN (insn)) == USE
30527 || GET_CODE (PATTERN (insn)) == CLOBBER)
30530 if (rs6000_tune == PROCESSOR_CELL)
30531 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
30533 if (rs6000_sched_groups
30534 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
30536 enum attr_type type = get_attr_type (insn);
30537 if ((type == TYPE_LOAD
30538 && get_attr_update (insn) == UPDATE_YES
30539 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
30540 || ((type == TYPE_LOAD || type == TYPE_STORE)
30541 && get_attr_update (insn) == UPDATE_YES
30542 && get_attr_indexed (insn) == INDEXED_YES)
30543 || type == TYPE_MFCR)
30550 /* The function returns true if INSN is cracked into 2 instructions
30551 by the processor (and therefore occupies 2 issue slots). */
30554 is_cracked_insn (rtx_insn *insn)
30556 if (!insn || !NONDEBUG_INSN_P (insn)
30557 || GET_CODE (PATTERN (insn)) == USE
30558 || GET_CODE (PATTERN (insn)) == CLOBBER)
30561 if (rs6000_sched_groups
30562 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
30564 enum attr_type type = get_attr_type (insn);
30565 if ((type == TYPE_LOAD
30566 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30567 && get_attr_update (insn) == UPDATE_NO)
30568 || (type == TYPE_LOAD
30569 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
30570 && get_attr_update (insn) == UPDATE_YES
30571 && get_attr_indexed (insn) == INDEXED_NO)
30572 || (type == TYPE_STORE
30573 && get_attr_update (insn) == UPDATE_YES
30574 && get_attr_indexed (insn) == INDEXED_NO)
30575 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
30576 && get_attr_update (insn) == UPDATE_YES)
30577 || (type == TYPE_CR_LOGICAL
30578 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
30579 || (type == TYPE_EXTS
30580 && get_attr_dot (insn) == DOT_YES)
30581 || (type == TYPE_SHIFT
30582 && get_attr_dot (insn) == DOT_YES
30583 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
30584 || (type == TYPE_MUL
30585 && get_attr_dot (insn) == DOT_YES)
30586 || type == TYPE_DIV
30587 || (type == TYPE_INSERT
30588 && get_attr_size (insn) == SIZE_32))
30595 /* The function returns true if INSN can be issued only from
30596 the branch slot. */
30599 is_branch_slot_insn (rtx_insn *insn)
30601 if (!insn || !NONDEBUG_INSN_P (insn)
30602 || GET_CODE (PATTERN (insn)) == USE
30603 || GET_CODE (PATTERN (insn)) == CLOBBER)
30606 if (rs6000_sched_groups)
30608 enum attr_type type = get_attr_type (insn);
30609 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
30617 /* The function returns true if out_inst sets a value that is
30618 used in the address generation computation of in_insn */
30620 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
30622 rtx out_set, in_set;
30624 /* For performance reasons, only handle the simple case where
30625 both loads are a single_set. */
30626 out_set = single_set (out_insn);
30629 in_set = single_set (in_insn);
30631 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
30637 /* Try to determine base/offset/size parts of the given MEM.
30638 Return true if successful, false if all the values couldn't
30641 This function only looks for REG or REG+CONST address forms.
30642 REG+REG address form will return false. */
30645 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
30646 HOST_WIDE_INT *size)
30649 if MEM_SIZE_KNOWN_P (mem)
30650 *size = MEM_SIZE (mem);
30654 addr_rtx = (XEXP (mem, 0));
30655 if (GET_CODE (addr_rtx) == PRE_MODIFY)
30656 addr_rtx = XEXP (addr_rtx, 1);
30659 while (GET_CODE (addr_rtx) == PLUS
30660 && CONST_INT_P (XEXP (addr_rtx, 1)))
30662 *offset += INTVAL (XEXP (addr_rtx, 1));
30663 addr_rtx = XEXP (addr_rtx, 0);
30665 if (!REG_P (addr_rtx))
30672 /* The function returns true if the target storage location of
30673 mem1 is adjacent to the target storage location of mem2 */
30674 /* Return 1 if memory locations are adjacent. */
30677 adjacent_mem_locations (rtx mem1, rtx mem2)
30680 HOST_WIDE_INT off1, size1, off2, size2;
30682 if (get_memref_parts (mem1, ®1, &off1, &size1)
30683 && get_memref_parts (mem2, ®2, &off2, &size2))
30684 return ((REGNO (reg1) == REGNO (reg2))
30685 && ((off1 + size1 == off2)
30686 || (off2 + size2 == off1)));
30691 /* This function returns true if it can be determined that the two MEM
30692 locations overlap by at least 1 byte based on base reg/offset/size. */
30695 mem_locations_overlap (rtx mem1, rtx mem2)
30698 HOST_WIDE_INT off1, size1, off2, size2;
30700 if (get_memref_parts (mem1, ®1, &off1, &size1)
30701 && get_memref_parts (mem2, ®2, &off2, &size2))
30702 return ((REGNO (reg1) == REGNO (reg2))
30703 && (((off1 <= off2) && (off1 + size1 > off2))
30704 || ((off2 <= off1) && (off2 + size2 > off1))));
30709 /* A C statement (sans semicolon) to update the integer scheduling
30710 priority INSN_PRIORITY (INSN). Increase the priority to execute the
30711 INSN earlier, reduce the priority to execute INSN later. Do not
30712 define this macro if you do not need to adjust the scheduling
30713 priorities of insns. */
30716 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
30718 rtx load_mem, str_mem;
30719 /* On machines (like the 750) which have asymmetric integer units,
30720 where one integer unit can do multiply and divides and the other
30721 can't, reduce the priority of multiply/divide so it is scheduled
30722 before other integer operations. */
30725 if (! INSN_P (insn))
30728 if (GET_CODE (PATTERN (insn)) == USE)
30731 switch (rs6000_tune) {
30732 case PROCESSOR_PPC750:
30733 switch (get_attr_type (insn))
30740 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
30741 priority, priority);
30742 if (priority >= 0 && priority < 0x01000000)
30749 if (insn_must_be_first_in_group (insn)
30750 && reload_completed
30751 && current_sched_info->sched_max_insns_priority
30752 && rs6000_sched_restricted_insns_priority)
30755 /* Prioritize insns that can be dispatched only in the first
30757 if (rs6000_sched_restricted_insns_priority == 1)
30758 /* Attach highest priority to insn. This means that in
30759 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
30760 precede 'priority' (critical path) considerations. */
30761 return current_sched_info->sched_max_insns_priority;
30762 else if (rs6000_sched_restricted_insns_priority == 2)
30763 /* Increase priority of insn by a minimal amount. This means that in
30764 haifa-sched.c:ready_sort(), only 'priority' (critical path)
30765 considerations precede dispatch-slot restriction considerations. */
30766 return (priority + 1);
30769 if (rs6000_tune == PROCESSOR_POWER6
30770 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
30771 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
30772 /* Attach highest priority to insn if the scheduler has just issued two
30773 stores and this instruction is a load, or two loads and this instruction
30774 is a store. Power6 wants loads and stores scheduled alternately
30776 return current_sched_info->sched_max_insns_priority;
30781 /* Return true if the instruction is nonpipelined on the Cell. */
30783 is_nonpipeline_insn (rtx_insn *insn)
30785 enum attr_type type;
30786 if (!insn || !NONDEBUG_INSN_P (insn)
30787 || GET_CODE (PATTERN (insn)) == USE
30788 || GET_CODE (PATTERN (insn)) == CLOBBER)
30791 type = get_attr_type (insn);
30792 if (type == TYPE_MUL
30793 || type == TYPE_DIV
30794 || type == TYPE_SDIV
30795 || type == TYPE_DDIV
30796 || type == TYPE_SSQRT
30797 || type == TYPE_DSQRT
30798 || type == TYPE_MFCR
30799 || type == TYPE_MFCRF
30800 || type == TYPE_MFJMPR)
30808 /* Return how many instructions the machine can issue per cycle. */
30811 rs6000_issue_rate (void)
30813 /* Unless scheduling for register pressure, use issue rate of 1 for
30814 first scheduling pass to decrease degradation. */
30815 if (!reload_completed && !flag_sched_pressure)
30818 switch (rs6000_tune) {
30819 case PROCESSOR_RS64A:
30820 case PROCESSOR_PPC601: /* ? */
30821 case PROCESSOR_PPC7450:
30823 case PROCESSOR_PPC440:
30824 case PROCESSOR_PPC603:
30825 case PROCESSOR_PPC750:
30826 case PROCESSOR_PPC7400:
30827 case PROCESSOR_PPC8540:
30828 case PROCESSOR_PPC8548:
30829 case PROCESSOR_CELL:
30830 case PROCESSOR_PPCE300C2:
30831 case PROCESSOR_PPCE300C3:
30832 case PROCESSOR_PPCE500MC:
30833 case PROCESSOR_PPCE500MC64:
30834 case PROCESSOR_PPCE5500:
30835 case PROCESSOR_PPCE6500:
30836 case PROCESSOR_TITAN:
30838 case PROCESSOR_PPC476:
30839 case PROCESSOR_PPC604:
30840 case PROCESSOR_PPC604e:
30841 case PROCESSOR_PPC620:
30842 case PROCESSOR_PPC630:
30844 case PROCESSOR_POWER4:
30845 case PROCESSOR_POWER5:
30846 case PROCESSOR_POWER6:
30847 case PROCESSOR_POWER7:
30849 case PROCESSOR_POWER8:
30851 case PROCESSOR_POWER9:
30858 /* Return how many instructions to look ahead for better insn
30862 rs6000_use_sched_lookahead (void)
30864 switch (rs6000_tune)
30866 case PROCESSOR_PPC8540:
30867 case PROCESSOR_PPC8548:
30870 case PROCESSOR_CELL:
30871 return (reload_completed ? 8 : 0);
30878 /* We are choosing insn from the ready queue. Return zero if INSN can be
30881 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
30883 if (ready_index == 0)
30886 if (rs6000_tune != PROCESSOR_CELL)
30889 gcc_assert (insn != NULL_RTX && INSN_P (insn));
30891 if (!reload_completed
30892 || is_nonpipeline_insn (insn)
30893 || is_microcoded_insn (insn))
30899 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
30900 and return true. */
30903 find_mem_ref (rtx pat, rtx *mem_ref)
30908 /* stack_tie does not produce any real memory traffic. */
30909 if (tie_operand (pat, VOIDmode))
30918 /* Recursively process the pattern. */
30919 fmt = GET_RTX_FORMAT (GET_CODE (pat));
30921 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
30925 if (find_mem_ref (XEXP (pat, i), mem_ref))
30928 else if (fmt[i] == 'E')
30929 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
30931 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
30939 /* Determine if PAT is a PATTERN of a load insn. */
30942 is_load_insn1 (rtx pat, rtx *load_mem)
30944 if (!pat || pat == NULL_RTX)
30947 if (GET_CODE (pat) == SET)
30948 return find_mem_ref (SET_SRC (pat), load_mem);
30950 if (GET_CODE (pat) == PARALLEL)
30954 for (i = 0; i < XVECLEN (pat, 0); i++)
30955 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
30962 /* Determine if INSN loads from memory. */
30965 is_load_insn (rtx insn, rtx *load_mem)
30967 if (!insn || !INSN_P (insn))
30973 return is_load_insn1 (PATTERN (insn), load_mem);
30976 /* Determine if PAT is a PATTERN of a store insn. */
30979 is_store_insn1 (rtx pat, rtx *str_mem)
30981 if (!pat || pat == NULL_RTX)
30984 if (GET_CODE (pat) == SET)
30985 return find_mem_ref (SET_DEST (pat), str_mem);
30987 if (GET_CODE (pat) == PARALLEL)
30991 for (i = 0; i < XVECLEN (pat, 0); i++)
30992 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
30999 /* Determine if INSN stores to memory. */
31002 is_store_insn (rtx insn, rtx *str_mem)
31004 if (!insn || !INSN_P (insn))
31007 return is_store_insn1 (PATTERN (insn), str_mem);
31010 /* Return whether TYPE is a Power9 pairable vector instruction type. */
31013 is_power9_pairable_vec_type (enum attr_type type)
31017 case TYPE_VECSIMPLE:
31018 case TYPE_VECCOMPLEX:
31022 case TYPE_VECFLOAT:
31024 case TYPE_VECDOUBLE:
31032 /* Returns whether the dependence between INSN and NEXT is considered
31033 costly by the given target. */
31036 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
31040 rtx load_mem, str_mem;
31042 /* If the flag is not enabled - no dependence is considered costly;
31043 allow all dependent insns in the same group.
31044 This is the most aggressive option. */
31045 if (rs6000_sched_costly_dep == no_dep_costly)
31048 /* If the flag is set to 1 - a dependence is always considered costly;
31049 do not allow dependent instructions in the same group.
31050 This is the most conservative option. */
31051 if (rs6000_sched_costly_dep == all_deps_costly)
31054 insn = DEP_PRO (dep);
31055 next = DEP_CON (dep);
31057 if (rs6000_sched_costly_dep == store_to_load_dep_costly
31058 && is_load_insn (next, &load_mem)
31059 && is_store_insn (insn, &str_mem))
31060 /* Prevent load after store in the same group. */
31063 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
31064 && is_load_insn (next, &load_mem)
31065 && is_store_insn (insn, &str_mem)
31066 && DEP_TYPE (dep) == REG_DEP_TRUE
31067 && mem_locations_overlap(str_mem, load_mem))
31068 /* Prevent load after store in the same group if it is a true
31072 /* The flag is set to X; dependences with latency >= X are considered costly,
31073 and will not be scheduled in the same group. */
31074 if (rs6000_sched_costly_dep <= max_dep_latency
31075 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
31081 /* Return the next insn after INSN that is found before TAIL is reached,
31082 skipping any "non-active" insns - insns that will not actually occupy
31083 an issue slot. Return NULL_RTX if such an insn is not found. */
31086 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
31088 if (insn == NULL_RTX || insn == tail)
31093 insn = NEXT_INSN (insn);
31094 if (insn == NULL_RTX || insn == tail)
31098 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
31099 || (NONJUMP_INSN_P (insn)
31100 && GET_CODE (PATTERN (insn)) != USE
31101 && GET_CODE (PATTERN (insn)) != CLOBBER
31102 && INSN_CODE (insn) != CODE_FOR_stack_tie))
31108 /* Do Power9 specific sched_reorder2 reordering of ready list. */
31111 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
31116 enum attr_type type, type2;
31118 type = get_attr_type (last_scheduled_insn);
31120 /* Try to issue fixed point divides back-to-back in pairs so they will be
31121 routed to separate execution units and execute in parallel. */
31122 if (type == TYPE_DIV && divide_cnt == 0)
31124 /* First divide has been scheduled. */
31127 /* Scan the ready list looking for another divide, if found move it
31128 to the end of the list so it is chosen next. */
31132 if (recog_memoized (ready[pos]) >= 0
31133 && get_attr_type (ready[pos]) == TYPE_DIV)
31136 for (i = pos; i < lastpos; i++)
31137 ready[i] = ready[i + 1];
31138 ready[lastpos] = tmp;
31146 /* Last insn was the 2nd divide or not a divide, reset the counter. */
31149 /* The best dispatch throughput for vector and vector load insns can be
31150 achieved by interleaving a vector and vector load such that they'll
31151 dispatch to the same superslice. If this pairing cannot be achieved
31152 then it is best to pair vector insns together and vector load insns
31155 To aid in this pairing, vec_pairing maintains the current state with
31156 the following values:
31158 0 : Initial state, no vecload/vector pairing has been started.
31160 1 : A vecload or vector insn has been issued and a candidate for
31161 pairing has been found and moved to the end of the ready
31163 if (type == TYPE_VECLOAD)
31165 /* Issued a vecload. */
31166 if (vec_pairing == 0)
31168 int vecload_pos = -1;
31169 /* We issued a single vecload, look for a vector insn to pair it
31170 with. If one isn't found, try to pair another vecload. */
31174 if (recog_memoized (ready[pos]) >= 0)
31176 type2 = get_attr_type (ready[pos]);
31177 if (is_power9_pairable_vec_type (type2))
31179 /* Found a vector insn to pair with, move it to the
31180 end of the ready list so it is scheduled next. */
31182 for (i = pos; i < lastpos; i++)
31183 ready[i] = ready[i + 1];
31184 ready[lastpos] = tmp;
31186 return cached_can_issue_more;
31188 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
31189 /* Remember position of first vecload seen. */
31194 if (vecload_pos >= 0)
31196 /* Didn't find a vector to pair with but did find a vecload,
31197 move it to the end of the ready list. */
31198 tmp = ready[vecload_pos];
31199 for (i = vecload_pos; i < lastpos; i++)
31200 ready[i] = ready[i + 1];
31201 ready[lastpos] = tmp;
31203 return cached_can_issue_more;
31207 else if (is_power9_pairable_vec_type (type))
31209 /* Issued a vector operation. */
31210 if (vec_pairing == 0)
31213 /* We issued a single vector insn, look for a vecload to pair it
31214 with. If one isn't found, try to pair another vector. */
31218 if (recog_memoized (ready[pos]) >= 0)
31220 type2 = get_attr_type (ready[pos]);
31221 if (type2 == TYPE_VECLOAD)
31223 /* Found a vecload insn to pair with, move it to the
31224 end of the ready list so it is scheduled next. */
31226 for (i = pos; i < lastpos; i++)
31227 ready[i] = ready[i + 1];
31228 ready[lastpos] = tmp;
31230 return cached_can_issue_more;
31232 else if (is_power9_pairable_vec_type (type2)
31234 /* Remember position of first vector insn seen. */
31241 /* Didn't find a vecload to pair with but did find a vector
31242 insn, move it to the end of the ready list. */
31243 tmp = ready[vec_pos];
31244 for (i = vec_pos; i < lastpos; i++)
31245 ready[i] = ready[i + 1];
31246 ready[lastpos] = tmp;
31248 return cached_can_issue_more;
31253 /* We've either finished a vec/vecload pair, couldn't find an insn to
31254 continue the current pair, or the last insn had nothing to do with
31255 with pairing. In any case, reset the state. */
31259 return cached_can_issue_more;
31262 /* We are about to begin issuing insns for this clock cycle. */
31265 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
31266 rtx_insn **ready ATTRIBUTE_UNUSED,
31267 int *pn_ready ATTRIBUTE_UNUSED,
31268 int clock_var ATTRIBUTE_UNUSED)
31270 int n_ready = *pn_ready;
31273 fprintf (dump, "// rs6000_sched_reorder :\n");
31275 /* Reorder the ready list, if the second to last ready insn
31276 is a nonepipeline insn. */
31277 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
31279 if (is_nonpipeline_insn (ready[n_ready - 1])
31280 && (recog_memoized (ready[n_ready - 2]) > 0))
31281 /* Simply swap first two insns. */
31282 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
31285 if (rs6000_tune == PROCESSOR_POWER6)
31286 load_store_pendulum = 0;
31288 return rs6000_issue_rate ();
31291 /* Like rs6000_sched_reorder, but called after issuing each insn. */
31294 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
31295 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
31298 fprintf (dump, "// rs6000_sched_reorder2 :\n");
31300 /* For Power6, we need to handle some special cases to try and keep the
31301 store queue from overflowing and triggering expensive flushes.
31303 This code monitors how load and store instructions are being issued
31304 and skews the ready list one way or the other to increase the likelihood
31305 that a desired instruction is issued at the proper time.
31307 A couple of things are done. First, we maintain a "load_store_pendulum"
31308 to track the current state of load/store issue.
31310 - If the pendulum is at zero, then no loads or stores have been
31311 issued in the current cycle so we do nothing.
31313 - If the pendulum is 1, then a single load has been issued in this
31314 cycle and we attempt to locate another load in the ready list to
31317 - If the pendulum is -2, then two stores have already been
31318 issued in this cycle, so we increase the priority of the first load
31319 in the ready list to increase it's likelihood of being chosen first
31322 - If the pendulum is -1, then a single store has been issued in this
31323 cycle and we attempt to locate another store in the ready list to
31324 issue with it, preferring a store to an adjacent memory location to
31325 facilitate store pairing in the store queue.
31327 - If the pendulum is 2, then two loads have already been
31328 issued in this cycle, so we increase the priority of the first store
31329 in the ready list to increase it's likelihood of being chosen first
31332 - If the pendulum < -2 or > 2, then do nothing.
31334 Note: This code covers the most common scenarios. There exist non
31335 load/store instructions which make use of the LSU and which
31336 would need to be accounted for to strictly model the behavior
31337 of the machine. Those instructions are currently unaccounted
31338 for to help minimize compile time overhead of this code.
31340 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
31345 rtx load_mem, str_mem;
31347 if (is_store_insn (last_scheduled_insn, &str_mem))
31348 /* Issuing a store, swing the load_store_pendulum to the left */
31349 load_store_pendulum--;
31350 else if (is_load_insn (last_scheduled_insn, &load_mem))
31351 /* Issuing a load, swing the load_store_pendulum to the right */
31352 load_store_pendulum++;
31354 return cached_can_issue_more;
31356 /* If the pendulum is balanced, or there is only one instruction on
31357 the ready list, then all is well, so return. */
31358 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
31359 return cached_can_issue_more;
31361 if (load_store_pendulum == 1)
31363 /* A load has been issued in this cycle. Scan the ready list
31364 for another load to issue with it */
31369 if (is_load_insn (ready[pos], &load_mem))
31371 /* Found a load. Move it to the head of the ready list,
31372 and adjust it's priority so that it is more likely to
31375 for (i=pos; i<*pn_ready-1; i++)
31376 ready[i] = ready[i + 1];
31377 ready[*pn_ready-1] = tmp;
31379 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31380 INSN_PRIORITY (tmp)++;
31386 else if (load_store_pendulum == -2)
31388 /* Two stores have been issued in this cycle. Increase the
31389 priority of the first load in the ready list to favor it for
31390 issuing in the next cycle. */
31395 if (is_load_insn (ready[pos], &load_mem)
31397 && INSN_PRIORITY_KNOWN (ready[pos]))
31399 INSN_PRIORITY (ready[pos])++;
31401 /* Adjust the pendulum to account for the fact that a load
31402 was found and increased in priority. This is to prevent
31403 increasing the priority of multiple loads */
31404 load_store_pendulum--;
31411 else if (load_store_pendulum == -1)
31413 /* A store has been issued in this cycle. Scan the ready list for
31414 another store to issue with it, preferring a store to an adjacent
31416 int first_store_pos = -1;
31422 if (is_store_insn (ready[pos], &str_mem))
31425 /* Maintain the index of the first store found on the
31427 if (first_store_pos == -1)
31428 first_store_pos = pos;
31430 if (is_store_insn (last_scheduled_insn, &str_mem2)
31431 && adjacent_mem_locations (str_mem, str_mem2))
31433 /* Found an adjacent store. Move it to the head of the
31434 ready list, and adjust it's priority so that it is
31435 more likely to stay there */
31437 for (i=pos; i<*pn_ready-1; i++)
31438 ready[i] = ready[i + 1];
31439 ready[*pn_ready-1] = tmp;
31441 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31442 INSN_PRIORITY (tmp)++;
31444 first_store_pos = -1;
31452 if (first_store_pos >= 0)
31454 /* An adjacent store wasn't found, but a non-adjacent store was,
31455 so move the non-adjacent store to the front of the ready
31456 list, and adjust its priority so that it is more likely to
31458 tmp = ready[first_store_pos];
31459 for (i=first_store_pos; i<*pn_ready-1; i++)
31460 ready[i] = ready[i + 1];
31461 ready[*pn_ready-1] = tmp;
31462 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31463 INSN_PRIORITY (tmp)++;
31466 else if (load_store_pendulum == 2)
31468 /* Two loads have been issued in this cycle. Increase the priority
31469 of the first store in the ready list to favor it for issuing in
31475 if (is_store_insn (ready[pos], &str_mem)
31477 && INSN_PRIORITY_KNOWN (ready[pos]))
31479 INSN_PRIORITY (ready[pos])++;
31481 /* Adjust the pendulum to account for the fact that a store
31482 was found and increased in priority. This is to prevent
31483 increasing the priority of multiple stores */
31484 load_store_pendulum++;
31493 /* Do Power9 dependent reordering if necessary. */
31494 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
31495 && recog_memoized (last_scheduled_insn) >= 0)
31496 return power9_sched_reorder2 (ready, *pn_ready - 1);
31498 return cached_can_issue_more;
31501 /* Return whether the presence of INSN causes a dispatch group termination
31502 of group WHICH_GROUP.
31504 If WHICH_GROUP == current_group, this function will return true if INSN
31505 causes the termination of the current group (i.e, the dispatch group to
31506 which INSN belongs). This means that INSN will be the last insn in the
31507 group it belongs to.
31509 If WHICH_GROUP == previous_group, this function will return true if INSN
31510 causes the termination of the previous group (i.e, the dispatch group that
31511 precedes the group to which INSN belongs). This means that INSN will be
31512 the first insn in the group it belongs to). */
31515 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
31522 first = insn_must_be_first_in_group (insn);
31523 last = insn_must_be_last_in_group (insn);
31528 if (which_group == current_group)
31530 else if (which_group == previous_group)
31538 insn_must_be_first_in_group (rtx_insn *insn)
31540 enum attr_type type;
31544 || DEBUG_INSN_P (insn)
31545 || GET_CODE (PATTERN (insn)) == USE
31546 || GET_CODE (PATTERN (insn)) == CLOBBER)
31549 switch (rs6000_tune)
31551 case PROCESSOR_POWER5:
31552 if (is_cracked_insn (insn))
31555 case PROCESSOR_POWER4:
31556 if (is_microcoded_insn (insn))
31559 if (!rs6000_sched_groups)
31562 type = get_attr_type (insn);
31569 case TYPE_CR_LOGICAL:
31582 case PROCESSOR_POWER6:
31583 type = get_attr_type (insn);
31592 case TYPE_FPCOMPARE:
31603 if (get_attr_dot (insn) == DOT_NO
31604 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31609 if (get_attr_size (insn) == SIZE_32)
31617 if (get_attr_update (insn) == UPDATE_YES)
31625 case PROCESSOR_POWER7:
31626 type = get_attr_type (insn);
31630 case TYPE_CR_LOGICAL:
31644 if (get_attr_dot (insn) == DOT_YES)
31649 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31650 || get_attr_update (insn) == UPDATE_YES)
31657 if (get_attr_update (insn) == UPDATE_YES)
31665 case PROCESSOR_POWER8:
31666 type = get_attr_type (insn);
31670 case TYPE_CR_LOGICAL:
31678 case TYPE_VECSTORE:
31685 if (get_attr_dot (insn) == DOT_YES)
31690 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31691 || get_attr_update (insn) == UPDATE_YES)
31696 if (get_attr_update (insn) == UPDATE_YES
31697 && get_attr_indexed (insn) == INDEXED_YES)
31713 insn_must_be_last_in_group (rtx_insn *insn)
31715 enum attr_type type;
31719 || DEBUG_INSN_P (insn)
31720 || GET_CODE (PATTERN (insn)) == USE
31721 || GET_CODE (PATTERN (insn)) == CLOBBER)
31724 switch (rs6000_tune) {
31725 case PROCESSOR_POWER4:
31726 case PROCESSOR_POWER5:
31727 if (is_microcoded_insn (insn))
31730 if (is_branch_slot_insn (insn))
31734 case PROCESSOR_POWER6:
31735 type = get_attr_type (insn);
31743 case TYPE_FPCOMPARE:
31754 if (get_attr_dot (insn) == DOT_NO
31755 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31760 if (get_attr_size (insn) == SIZE_32)
31768 case PROCESSOR_POWER7:
31769 type = get_attr_type (insn);
31779 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31780 && get_attr_update (insn) == UPDATE_YES)
31785 if (get_attr_update (insn) == UPDATE_YES
31786 && get_attr_indexed (insn) == INDEXED_YES)
31794 case PROCESSOR_POWER8:
31795 type = get_attr_type (insn);
31807 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31808 && get_attr_update (insn) == UPDATE_YES)
31813 if (get_attr_update (insn) == UPDATE_YES
31814 && get_attr_indexed (insn) == INDEXED_YES)
31829 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
31830 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
31833 is_costly_group (rtx *group_insns, rtx next_insn)
31836 int issue_rate = rs6000_issue_rate ();
31838 for (i = 0; i < issue_rate; i++)
31840 sd_iterator_def sd_it;
31842 rtx insn = group_insns[i];
31847 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
31849 rtx next = DEP_CON (dep);
31851 if (next == next_insn
31852 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
31860 /* Utility of the function redefine_groups.
31861 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
31862 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
31863 to keep it "far" (in a separate group) from GROUP_INSNS, following
31864 one of the following schemes, depending on the value of the flag
31865 -minsert_sched_nops = X:
31866 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
31867 in order to force NEXT_INSN into a separate group.
31868 (2) X < sched_finish_regroup_exact: insert exactly X nops.
31869 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
31870 insertion (has a group just ended, how many vacant issue slots remain in the
31871 last group, and how many dispatch groups were encountered so far). */
31874 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
31875 rtx_insn *next_insn, bool *group_end, int can_issue_more,
31880 int issue_rate = rs6000_issue_rate ();
31881 bool end = *group_end;
31884 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
31885 return can_issue_more;
31887 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
31888 return can_issue_more;
31890 force = is_costly_group (group_insns, next_insn);
31892 return can_issue_more;
31894 if (sched_verbose > 6)
31895 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
31896 *group_count ,can_issue_more);
31898 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
31901 can_issue_more = 0;
31903 /* Since only a branch can be issued in the last issue_slot, it is
31904 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
31905 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
31906 in this case the last nop will start a new group and the branch
31907 will be forced to the new group. */
31908 if (can_issue_more && !is_branch_slot_insn (next_insn))
31911 /* Do we have a special group ending nop? */
31912 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
31913 || rs6000_tune == PROCESSOR_POWER8)
31915 nop = gen_group_ending_nop ();
31916 emit_insn_before (nop, next_insn);
31917 can_issue_more = 0;
31920 while (can_issue_more > 0)
31923 emit_insn_before (nop, next_insn);
31931 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
31933 int n_nops = rs6000_sched_insert_nops;
31935 /* Nops can't be issued from the branch slot, so the effective
31936 issue_rate for nops is 'issue_rate - 1'. */
31937 if (can_issue_more == 0)
31938 can_issue_more = issue_rate;
31940 if (can_issue_more == 0)
31942 can_issue_more = issue_rate - 1;
31945 for (i = 0; i < issue_rate; i++)
31947 group_insns[i] = 0;
31954 emit_insn_before (nop, next_insn);
31955 if (can_issue_more == issue_rate - 1) /* new group begins */
31958 if (can_issue_more == 0)
31960 can_issue_more = issue_rate - 1;
31963 for (i = 0; i < issue_rate; i++)
31965 group_insns[i] = 0;
31971 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
31974 /* Is next_insn going to start a new group? */
31977 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
31978 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
31979 || (can_issue_more < issue_rate &&
31980 insn_terminates_group_p (next_insn, previous_group)));
31981 if (*group_end && end)
31984 if (sched_verbose > 6)
31985 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
31986 *group_count, can_issue_more);
31987 return can_issue_more;
31990 return can_issue_more;
31993 /* This function tries to synch the dispatch groups that the compiler "sees"
31994 with the dispatch groups that the processor dispatcher is expected to
31995 form in practice. It tries to achieve this synchronization by forcing the
31996 estimated processor grouping on the compiler (as opposed to the function
31997 'pad_goups' which tries to force the scheduler's grouping on the processor).
31999 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
32000 examines the (estimated) dispatch groups that will be formed by the processor
32001 dispatcher. It marks these group boundaries to reflect the estimated
32002 processor grouping, overriding the grouping that the scheduler had marked.
32003 Depending on the value of the flag '-minsert-sched-nops' this function can
32004 force certain insns into separate groups or force a certain distance between
32005 them by inserting nops, for example, if there exists a "costly dependence"
32008 The function estimates the group boundaries that the processor will form as
32009 follows: It keeps track of how many vacant issue slots are available after
32010 each insn. A subsequent insn will start a new group if one of the following
32012 - no more vacant issue slots remain in the current dispatch group.
32013 - only the last issue slot, which is the branch slot, is vacant, but the next
32014 insn is not a branch.
32015 - only the last 2 or less issue slots, including the branch slot, are vacant,
32016 which means that a cracked insn (which occupies two issue slots) can't be
32017 issued in this group.
32018 - less than 'issue_rate' slots are vacant, and the next insn always needs to
32019 start a new group. */
32022 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32025 rtx_insn *insn, *next_insn;
32027 int can_issue_more;
32030 int group_count = 0;
32034 issue_rate = rs6000_issue_rate ();
32035 group_insns = XALLOCAVEC (rtx, issue_rate);
32036 for (i = 0; i < issue_rate; i++)
32038 group_insns[i] = 0;
32040 can_issue_more = issue_rate;
32042 insn = get_next_active_insn (prev_head_insn, tail);
32045 while (insn != NULL_RTX)
32047 slot = (issue_rate - can_issue_more);
32048 group_insns[slot] = insn;
32050 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32051 if (insn_terminates_group_p (insn, current_group))
32052 can_issue_more = 0;
32054 next_insn = get_next_active_insn (insn, tail);
32055 if (next_insn == NULL_RTX)
32056 return group_count + 1;
32058 /* Is next_insn going to start a new group? */
32060 = (can_issue_more == 0
32061 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
32062 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
32063 || (can_issue_more < issue_rate &&
32064 insn_terminates_group_p (next_insn, previous_group)));
32066 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
32067 next_insn, &group_end, can_issue_more,
32073 can_issue_more = 0;
32074 for (i = 0; i < issue_rate; i++)
32076 group_insns[i] = 0;
32080 if (GET_MODE (next_insn) == TImode && can_issue_more)
32081 PUT_MODE (next_insn, VOIDmode);
32082 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
32083 PUT_MODE (next_insn, TImode);
32086 if (can_issue_more == 0)
32087 can_issue_more = issue_rate;
32090 return group_count;
32093 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
32094 dispatch group boundaries that the scheduler had marked. Pad with nops
32095 any dispatch groups which have vacant issue slots, in order to force the
32096 scheduler's grouping on the processor dispatcher. The function
32097 returns the number of dispatch groups found. */
32100 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32103 rtx_insn *insn, *next_insn;
32106 int can_issue_more;
32108 int group_count = 0;
32110 /* Initialize issue_rate. */
32111 issue_rate = rs6000_issue_rate ();
32112 can_issue_more = issue_rate;
32114 insn = get_next_active_insn (prev_head_insn, tail);
32115 next_insn = get_next_active_insn (insn, tail);
32117 while (insn != NULL_RTX)
32120 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32122 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
32124 if (next_insn == NULL_RTX)
32129 /* If the scheduler had marked group termination at this location
32130 (between insn and next_insn), and neither insn nor next_insn will
32131 force group termination, pad the group with nops to force group
32134 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
32135 && !insn_terminates_group_p (insn, current_group)
32136 && !insn_terminates_group_p (next_insn, previous_group))
32138 if (!is_branch_slot_insn (next_insn))
32141 while (can_issue_more)
32144 emit_insn_before (nop, next_insn);
32149 can_issue_more = issue_rate;
32154 next_insn = get_next_active_insn (insn, tail);
32157 return group_count;
32160 /* We're beginning a new block. Initialize data structures as necessary. */
32163 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
32164 int sched_verbose ATTRIBUTE_UNUSED,
32165 int max_ready ATTRIBUTE_UNUSED)
32167 last_scheduled_insn = NULL;
32168 load_store_pendulum = 0;
32173 /* The following function is called at the end of scheduling BB.
32174 After reload, it inserts nops at insn group bundling. */
32177 rs6000_sched_finish (FILE *dump, int sched_verbose)
32182 fprintf (dump, "=== Finishing schedule.\n");
32184 if (reload_completed && rs6000_sched_groups)
32186 /* Do not run sched_finish hook when selective scheduling enabled. */
32187 if (sel_sched_p ())
32190 if (rs6000_sched_insert_nops == sched_finish_none)
32193 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
32194 n_groups = pad_groups (dump, sched_verbose,
32195 current_sched_info->prev_head,
32196 current_sched_info->next_tail);
32198 n_groups = redefine_groups (dump, sched_verbose,
32199 current_sched_info->prev_head,
32200 current_sched_info->next_tail);
32202 if (sched_verbose >= 6)
32204 fprintf (dump, "ngroups = %d\n", n_groups);
32205 print_rtl (dump, current_sched_info->prev_head);
32206 fprintf (dump, "Done finish_sched\n");
32211 struct rs6000_sched_context
32213 short cached_can_issue_more;
32214 rtx_insn *last_scheduled_insn;
32215 int load_store_pendulum;
32220 typedef struct rs6000_sched_context rs6000_sched_context_def;
32221 typedef rs6000_sched_context_def *rs6000_sched_context_t;
32223 /* Allocate store for new scheduling context. */
32225 rs6000_alloc_sched_context (void)
32227 return xmalloc (sizeof (rs6000_sched_context_def));
32230 /* If CLEAN_P is true then initializes _SC with clean data,
32231 and from the global context otherwise. */
32233 rs6000_init_sched_context (void *_sc, bool clean_p)
32235 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32239 sc->cached_can_issue_more = 0;
32240 sc->last_scheduled_insn = NULL;
32241 sc->load_store_pendulum = 0;
32242 sc->divide_cnt = 0;
32243 sc->vec_pairing = 0;
32247 sc->cached_can_issue_more = cached_can_issue_more;
32248 sc->last_scheduled_insn = last_scheduled_insn;
32249 sc->load_store_pendulum = load_store_pendulum;
32250 sc->divide_cnt = divide_cnt;
32251 sc->vec_pairing = vec_pairing;
32255 /* Sets the global scheduling context to the one pointed to by _SC. */
32257 rs6000_set_sched_context (void *_sc)
32259 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32261 gcc_assert (sc != NULL);
32263 cached_can_issue_more = sc->cached_can_issue_more;
32264 last_scheduled_insn = sc->last_scheduled_insn;
32265 load_store_pendulum = sc->load_store_pendulum;
32266 divide_cnt = sc->divide_cnt;
32267 vec_pairing = sc->vec_pairing;
32272 rs6000_free_sched_context (void *_sc)
32274 gcc_assert (_sc != NULL);
32280 rs6000_sched_can_speculate_insn (rtx_insn *insn)
32282 switch (get_attr_type (insn))
32297 /* Length in units of the trampoline for entering a nested function. */
32300 rs6000_trampoline_size (void)
32304 switch (DEFAULT_ABI)
32307 gcc_unreachable ();
32310 ret = (TARGET_32BIT) ? 12 : 24;
32314 gcc_assert (!TARGET_32BIT);
32320 ret = (TARGET_32BIT) ? 40 : 48;
32327 /* Emit RTL insns to initialize the variable parts of a trampoline.
32328 FNADDR is an RTX for the address of the function's pure code.
32329 CXT is an RTX for the static chain value for the function. */
32332 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
32334 int regsize = (TARGET_32BIT) ? 4 : 8;
32335 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
32336 rtx ctx_reg = force_reg (Pmode, cxt);
32337 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
32339 switch (DEFAULT_ABI)
32342 gcc_unreachable ();
32344 /* Under AIX, just build the 3 word function descriptor */
32347 rtx fnmem, fn_reg, toc_reg;
32349 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
32350 error ("you cannot take the address of a nested function if you use "
32351 "the %qs option", "-mno-pointers-to-nested-functions");
32353 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
32354 fn_reg = gen_reg_rtx (Pmode);
32355 toc_reg = gen_reg_rtx (Pmode);
32357 /* Macro to shorten the code expansions below. */
32358 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
32360 m_tramp = replace_equiv_address (m_tramp, addr);
32362 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
32363 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
32364 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
32365 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
32366 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
32372 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
32376 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
32377 LCT_NORMAL, VOIDmode,
32379 GEN_INT (rs6000_trampoline_size ()), SImode,
32387 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
32388 identifier as an argument, so the front end shouldn't look it up. */
32391 rs6000_attribute_takes_identifier_p (const_tree attr_id)
32393 return is_attribute_p ("altivec", attr_id);
32396 /* Handle the "altivec" attribute. The attribute may have
32397 arguments as follows:
32399 __attribute__((altivec(vector__)))
32400 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
32401 __attribute__((altivec(bool__))) (always followed by 'unsigned')
32403 and may appear more than once (e.g., 'vector bool char') in a
32404 given declaration. */
32407 rs6000_handle_altivec_attribute (tree *node,
32408 tree name ATTRIBUTE_UNUSED,
32410 int flags ATTRIBUTE_UNUSED,
32411 bool *no_add_attrs)
32413 tree type = *node, result = NULL_TREE;
32417 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
32418 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
32419 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
32422 while (POINTER_TYPE_P (type)
32423 || TREE_CODE (type) == FUNCTION_TYPE
32424 || TREE_CODE (type) == METHOD_TYPE
32425 || TREE_CODE (type) == ARRAY_TYPE)
32426 type = TREE_TYPE (type);
32428 mode = TYPE_MODE (type);
32430 /* Check for invalid AltiVec type qualifiers. */
32431 if (type == long_double_type_node)
32432 error ("use of %<long double%> in AltiVec types is invalid");
32433 else if (type == boolean_type_node)
32434 error ("use of boolean types in AltiVec types is invalid");
32435 else if (TREE_CODE (type) == COMPLEX_TYPE)
32436 error ("use of %<complex%> in AltiVec types is invalid");
32437 else if (DECIMAL_FLOAT_MODE_P (mode))
32438 error ("use of decimal floating point types in AltiVec types is invalid");
32439 else if (!TARGET_VSX)
32441 if (type == long_unsigned_type_node || type == long_integer_type_node)
32444 error ("use of %<long%> in AltiVec types is invalid for "
32445 "64-bit code without %qs", "-mvsx");
32446 else if (rs6000_warn_altivec_long)
32447 warning (0, "use of %<long%> in AltiVec types is deprecated; "
32450 else if (type == long_long_unsigned_type_node
32451 || type == long_long_integer_type_node)
32452 error ("use of %<long long%> in AltiVec types is invalid without %qs",
32454 else if (type == double_type_node)
32455 error ("use of %<double%> in AltiVec types is invalid without %qs",
32459 switch (altivec_type)
32462 unsigned_p = TYPE_UNSIGNED (type);
32466 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
32469 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
32472 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
32475 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
32478 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
32480 case E_SFmode: result = V4SF_type_node; break;
32481 case E_DFmode: result = V2DF_type_node; break;
32482 /* If the user says 'vector int bool', we may be handed the 'bool'
32483 attribute _before_ the 'vector' attribute, and so select the
32484 proper type in the 'b' case below. */
32485 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
32486 case E_V2DImode: case E_V2DFmode:
32494 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
32495 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
32496 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
32497 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
32504 case E_V8HImode: result = pixel_V8HI_type_node;
32510 /* Propagate qualifiers attached to the element type
32511 onto the vector type. */
32512 if (result && result != type && TYPE_QUALS (type))
32513 result = build_qualified_type (result, TYPE_QUALS (type));
32515 *no_add_attrs = true; /* No need to hang on to the attribute. */
32518 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
32523 /* AltiVec defines five built-in scalar types that serve as vector
32524 elements; we must teach the compiler how to mangle them. The 128-bit
32525 floating point mangling is target-specific as well. */
32527 static const char *
32528 rs6000_mangle_type (const_tree type)
32530 type = TYPE_MAIN_VARIANT (type);
32532 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32533 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32536 if (type == bool_char_type_node) return "U6__boolc";
32537 if (type == bool_short_type_node) return "U6__bools";
32538 if (type == pixel_type_node) return "u7__pixel";
32539 if (type == bool_int_type_node) return "U6__booli";
32540 if (type == bool_long_long_type_node) return "U6__boolx";
32542 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
32544 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
32545 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
32547 /* For all other types, use the default mangling. */
32551 /* Handle a "longcall" or "shortcall" attribute; arguments as in
32552 struct attribute_spec.handler. */
32555 rs6000_handle_longcall_attribute (tree *node, tree name,
32556 tree args ATTRIBUTE_UNUSED,
32557 int flags ATTRIBUTE_UNUSED,
32558 bool *no_add_attrs)
32560 if (TREE_CODE (*node) != FUNCTION_TYPE
32561 && TREE_CODE (*node) != FIELD_DECL
32562 && TREE_CODE (*node) != TYPE_DECL)
32564 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32566 *no_add_attrs = true;
32572 /* Set longcall attributes on all functions declared when
32573 rs6000_default_long_calls is true. */
32575 rs6000_set_default_type_attributes (tree type)
32577 if (rs6000_default_long_calls
32578 && (TREE_CODE (type) == FUNCTION_TYPE
32579 || TREE_CODE (type) == METHOD_TYPE))
32580 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
32582 TYPE_ATTRIBUTES (type));
32585 darwin_set_default_type_attributes (type);
32589 /* Return a reference suitable for calling a function with the
32590 longcall attribute. */
32593 rs6000_longcall_ref (rtx call_ref, rtx arg)
32595 /* System V adds '.' to the internal name, so skip them. */
32596 const char *call_name = XSTR (call_ref, 0);
32597 if (*call_name == '.')
32599 while (*call_name == '.')
32602 tree node = get_identifier (call_name);
32603 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
32608 rtx base = const0_rtx;
32610 if (DEFAULT_ABI == ABI_ELFv2)
32612 base = gen_rtx_REG (Pmode, TOC_REGISTER);
32618 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
32621 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
32622 may be used by a function global entry point. For SysV4, r11
32623 is used by __glink_PLTresolve lazy resolver entry. */
32624 rtx reg = gen_rtx_REG (Pmode, regno);
32625 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
32627 rtx lo = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, reg, call_ref, arg),
32629 emit_insn (gen_rtx_SET (reg, hi));
32630 emit_insn (gen_rtx_SET (reg, lo));
32634 return force_reg (Pmode, call_ref);
32637 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
32638 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
32641 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32642 struct attribute_spec.handler. */
32644 rs6000_handle_struct_attribute (tree *node, tree name,
32645 tree args ATTRIBUTE_UNUSED,
32646 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32649 if (DECL_P (*node))
32651 if (TREE_CODE (*node) == TYPE_DECL)
32652 type = &TREE_TYPE (*node);
32657 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
32658 || TREE_CODE (*type) == UNION_TYPE)))
32660 warning (OPT_Wattributes, "%qE attribute ignored", name);
32661 *no_add_attrs = true;
32664 else if ((is_attribute_p ("ms_struct", name)
32665 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
32666 || ((is_attribute_p ("gcc_struct", name)
32667 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
32669 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
32671 *no_add_attrs = true;
32678 rs6000_ms_bitfield_layout_p (const_tree record_type)
32680 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
32681 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
32682 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
32685 #ifdef USING_ELFOS_H
32687 /* A get_unnamed_section callback, used for switching to toc_section. */
32690 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
32692 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32693 && TARGET_MINIMAL_TOC)
32695 if (!toc_initialized)
32697 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32698 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32699 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
32700 fprintf (asm_out_file, "\t.tc ");
32701 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
32702 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32703 fprintf (asm_out_file, "\n");
32705 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32706 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32707 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32708 fprintf (asm_out_file, " = .+32768\n");
32709 toc_initialized = 1;
32712 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32714 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32716 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32717 if (!toc_initialized)
32719 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32720 toc_initialized = 1;
32725 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32726 if (!toc_initialized)
32728 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32729 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32730 fprintf (asm_out_file, " = .+32768\n");
32731 toc_initialized = 1;
32736 /* Implement TARGET_ASM_INIT_SECTIONS. */
32739 rs6000_elf_asm_init_sections (void)
32742 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
32745 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
32746 SDATA2_SECTION_ASM_OP);
32749 /* Implement TARGET_SELECT_RTX_SECTION. */
32752 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
32753 unsigned HOST_WIDE_INT align)
32755 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
32756 return toc_section;
32758 return default_elf_select_rtx_section (mode, x, align);
32761 /* For a SYMBOL_REF, set generic flags and then perform some
32762 target-specific processing.
32764 When the AIX ABI is requested on a non-AIX system, replace the
32765 function name with the real name (with a leading .) rather than the
32766 function descriptor name. This saves a lot of overriding code to
32767 read the prefixes. */
32769 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
32771 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
32773 default_encode_section_info (decl, rtl, first);
32776 && TREE_CODE (decl) == FUNCTION_DECL
32778 && DEFAULT_ABI == ABI_AIX)
32780 rtx sym_ref = XEXP (rtl, 0);
32781 size_t len = strlen (XSTR (sym_ref, 0));
32782 char *str = XALLOCAVEC (char, len + 2);
32784 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
32785 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
32790 compare_section_name (const char *section, const char *templ)
32794 len = strlen (templ);
32795 return (strncmp (section, templ, len) == 0
32796 && (section[len] == 0 || section[len] == '.'));
32800 rs6000_elf_in_small_data_p (const_tree decl)
32802 if (rs6000_sdata == SDATA_NONE)
32805 /* We want to merge strings, so we never consider them small data. */
32806 if (TREE_CODE (decl) == STRING_CST)
32809 /* Functions are never in the small data area. */
32810 if (TREE_CODE (decl) == FUNCTION_DECL)
32813 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
32815 const char *section = DECL_SECTION_NAME (decl);
32816 if (compare_section_name (section, ".sdata")
32817 || compare_section_name (section, ".sdata2")
32818 || compare_section_name (section, ".gnu.linkonce.s")
32819 || compare_section_name (section, ".sbss")
32820 || compare_section_name (section, ".sbss2")
32821 || compare_section_name (section, ".gnu.linkonce.sb")
32822 || strcmp (section, ".PPC.EMB.sdata0") == 0
32823 || strcmp (section, ".PPC.EMB.sbss0") == 0)
32828 /* If we are told not to put readonly data in sdata, then don't. */
32829 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
32830 && !rs6000_readonly_in_sdata)
32833 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
32836 && size <= g_switch_value
32837 /* If it's not public, and we're not going to reference it there,
32838 there's no need to put it in the small data section. */
32839 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
32846 #endif /* USING_ELFOS_H */
32848 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
32851 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
32853 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
32856 /* Do not place thread-local symbols refs in the object blocks. */
32859 rs6000_use_blocks_for_decl_p (const_tree decl)
32861 return !DECL_THREAD_LOCAL_P (decl);
32864 /* Return a REG that occurs in ADDR with coefficient 1.
32865 ADDR can be effectively incremented by incrementing REG.
32867 r0 is special and we must not select it as an address
32868 register by this routine since our caller will try to
32869 increment the returned register via an "la" instruction. */
32872 find_addr_reg (rtx addr)
32874 while (GET_CODE (addr) == PLUS)
32876 if (REG_P (XEXP (addr, 0))
32877 && REGNO (XEXP (addr, 0)) != 0)
32878 addr = XEXP (addr, 0);
32879 else if (REG_P (XEXP (addr, 1))
32880 && REGNO (XEXP (addr, 1)) != 0)
32881 addr = XEXP (addr, 1);
32882 else if (CONSTANT_P (XEXP (addr, 0)))
32883 addr = XEXP (addr, 1);
32884 else if (CONSTANT_P (XEXP (addr, 1)))
32885 addr = XEXP (addr, 0);
32887 gcc_unreachable ();
32889 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
32894 rs6000_fatal_bad_address (rtx op)
32896 fatal_insn ("bad address", op);
32901 typedef struct branch_island_d {
32902 tree function_name;
32908 static vec<branch_island, va_gc> *branch_islands;
32910 /* Remember to generate a branch island for far calls to the given
32914 add_compiler_branch_island (tree label_name, tree function_name,
32917 branch_island bi = {function_name, label_name, line_number};
32918 vec_safe_push (branch_islands, bi);
32921 /* Generate far-jump branch islands for everything recorded in
32922 branch_islands. Invoked immediately after the last instruction of
32923 the epilogue has been emitted; the branch islands must be appended
32924 to, and contiguous with, the function body. Mach-O stubs are
32925 generated in machopic_output_stub(). */
32928 macho_branch_islands (void)
32932 while (!vec_safe_is_empty (branch_islands))
32934 branch_island *bi = &branch_islands->last ();
32935 const char *label = IDENTIFIER_POINTER (bi->label_name);
32936 const char *name = IDENTIFIER_POINTER (bi->function_name);
32937 char name_buf[512];
32938 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
32939 if (name[0] == '*' || name[0] == '&')
32940 strcpy (name_buf, name+1);
32944 strcpy (name_buf+1, name);
32946 strcpy (tmp_buf, "\n");
32947 strcat (tmp_buf, label);
32948 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
32949 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
32950 dbxout_stabd (N_SLINE, bi->line_number);
32951 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
32954 if (TARGET_LINK_STACK)
32957 get_ppc476_thunk_name (name);
32958 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
32959 strcat (tmp_buf, name);
32960 strcat (tmp_buf, "\n");
32961 strcat (tmp_buf, label);
32962 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
32966 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
32967 strcat (tmp_buf, label);
32968 strcat (tmp_buf, "_pic\n");
32969 strcat (tmp_buf, label);
32970 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
32973 strcat (tmp_buf, "\taddis r11,r11,ha16(");
32974 strcat (tmp_buf, name_buf);
32975 strcat (tmp_buf, " - ");
32976 strcat (tmp_buf, label);
32977 strcat (tmp_buf, "_pic)\n");
32979 strcat (tmp_buf, "\tmtlr r0\n");
32981 strcat (tmp_buf, "\taddi r12,r11,lo16(");
32982 strcat (tmp_buf, name_buf);
32983 strcat (tmp_buf, " - ");
32984 strcat (tmp_buf, label);
32985 strcat (tmp_buf, "_pic)\n");
32987 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
32991 strcat (tmp_buf, ":\n\tlis r12,hi16(");
32992 strcat (tmp_buf, name_buf);
32993 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
32994 strcat (tmp_buf, name_buf);
32995 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
32997 output_asm_insn (tmp_buf, 0);
32998 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
32999 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33000 dbxout_stabd (N_SLINE, bi->line_number);
33001 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33002 branch_islands->pop ();
33006 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
33007 already there or not. */
33010 no_previous_def (tree function_name)
33015 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33016 if (function_name == bi->function_name)
33021 /* GET_PREV_LABEL gets the label name from the previous definition of
33025 get_prev_label (tree function_name)
33030 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33031 if (function_name == bi->function_name)
33032 return bi->label_name;
33036 /* Generate PIC and indirect symbol stubs. */
33039 machopic_output_stub (FILE *file, const char *symb, const char *stub)
33041 unsigned int length;
33042 char *symbol_name, *lazy_ptr_name;
33043 char *local_label_0;
33044 static unsigned label = 0;
33046 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
33047 symb = (*targetm.strip_name_encoding) (symb);
33050 length = strlen (symb);
33051 symbol_name = XALLOCAVEC (char, length + 32);
33052 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
33054 lazy_ptr_name = XALLOCAVEC (char, length + 32);
33055 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
33058 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
33060 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
33064 fprintf (file, "\t.align 5\n");
33066 fprintf (file, "%s:\n", stub);
33067 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33070 local_label_0 = XALLOCAVEC (char, 16);
33071 sprintf (local_label_0, "L%u$spb", label);
33073 fprintf (file, "\tmflr r0\n");
33074 if (TARGET_LINK_STACK)
33077 get_ppc476_thunk_name (name);
33078 fprintf (file, "\tbl %s\n", name);
33079 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33083 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
33084 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33086 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
33087 lazy_ptr_name, local_label_0);
33088 fprintf (file, "\tmtlr r0\n");
33089 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
33090 (TARGET_64BIT ? "ldu" : "lwzu"),
33091 lazy_ptr_name, local_label_0);
33092 fprintf (file, "\tmtctr r12\n");
33093 fprintf (file, "\tbctr\n");
33097 fprintf (file, "\t.align 4\n");
33099 fprintf (file, "%s:\n", stub);
33100 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33102 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
33103 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
33104 (TARGET_64BIT ? "ldu" : "lwzu"),
33106 fprintf (file, "\tmtctr r12\n");
33107 fprintf (file, "\tbctr\n");
33110 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
33111 fprintf (file, "%s:\n", lazy_ptr_name);
33112 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33113 fprintf (file, "%sdyld_stub_binding_helper\n",
33114 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
33117 /* Legitimize PIC addresses. If the address is already
33118 position-independent, we return ORIG. Newly generated
33119 position-independent addresses go into a reg. This is REG if non
33120 zero, otherwise we allocate register(s) as necessary. */
33122 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
33125 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
33130 if (reg == NULL && !reload_completed)
33131 reg = gen_reg_rtx (Pmode);
33133 if (GET_CODE (orig) == CONST)
33137 if (GET_CODE (XEXP (orig, 0)) == PLUS
33138 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
33141 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
33143 /* Use a different reg for the intermediate value, as
33144 it will be marked UNCHANGING. */
33145 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
33146 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
33149 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
33152 if (CONST_INT_P (offset))
33154 if (SMALL_INT (offset))
33155 return plus_constant (Pmode, base, INTVAL (offset));
33156 else if (!reload_completed)
33157 offset = force_reg (Pmode, offset);
33160 rtx mem = force_const_mem (Pmode, orig);
33161 return machopic_legitimize_pic_address (mem, Pmode, reg);
33164 return gen_rtx_PLUS (Pmode, base, offset);
33167 /* Fall back on generic machopic code. */
33168 return machopic_legitimize_pic_address (orig, mode, reg);
33171 /* Output a .machine directive for the Darwin assembler, and call
33172 the generic start_file routine. */
33175 rs6000_darwin_file_start (void)
33177 static const struct
33181 HOST_WIDE_INT if_set;
33183 { "ppc64", "ppc64", MASK_64BIT },
33184 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
33185 { "power4", "ppc970", 0 },
33186 { "G5", "ppc970", 0 },
33187 { "7450", "ppc7450", 0 },
33188 { "7400", "ppc7400", MASK_ALTIVEC },
33189 { "G4", "ppc7400", 0 },
33190 { "750", "ppc750", 0 },
33191 { "740", "ppc750", 0 },
33192 { "G3", "ppc750", 0 },
33193 { "604e", "ppc604e", 0 },
33194 { "604", "ppc604", 0 },
33195 { "603e", "ppc603", 0 },
33196 { "603", "ppc603", 0 },
33197 { "601", "ppc601", 0 },
33198 { NULL, "ppc", 0 } };
33199 const char *cpu_id = "";
33202 rs6000_file_start ();
33203 darwin_file_start ();
33205 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
33207 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
33208 cpu_id = rs6000_default_cpu;
33210 if (global_options_set.x_rs6000_cpu_index)
33211 cpu_id = processor_target_table[rs6000_cpu_index].name;
33213 /* Look through the mapping array. Pick the first name that either
33214 matches the argument, has a bit set in IF_SET that is also set
33215 in the target flags, or has a NULL name. */
33218 while (mapping[i].arg != NULL
33219 && strcmp (mapping[i].arg, cpu_id) != 0
33220 && (mapping[i].if_set & rs6000_isa_flags) == 0)
33223 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
33226 #endif /* TARGET_MACHO */
33230 rs6000_elf_reloc_rw_mask (void)
33234 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33240 /* Record an element in the table of global constructors. SYMBOL is
33241 a SYMBOL_REF of the function to be called; PRIORITY is a number
33242 between 0 and MAX_INIT_PRIORITY.
33244 This differs from default_named_section_asm_out_constructor in
33245 that we have special handling for -mrelocatable. */
33247 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
33249 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
33251 const char *section = ".ctors";
33254 if (priority != DEFAULT_INIT_PRIORITY)
33256 sprintf (buf, ".ctors.%.5u",
33257 /* Invert the numbering so the linker puts us in the proper
33258 order; constructors are run from right to left, and the
33259 linker sorts in increasing order. */
33260 MAX_INIT_PRIORITY - priority);
33264 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33265 assemble_align (POINTER_SIZE);
33267 if (DEFAULT_ABI == ABI_V4
33268 && (TARGET_RELOCATABLE || flag_pic > 1))
33270 fputs ("\t.long (", asm_out_file);
33271 output_addr_const (asm_out_file, symbol);
33272 fputs (")@fixup\n", asm_out_file);
33275 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33278 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
33280 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
33282 const char *section = ".dtors";
33285 if (priority != DEFAULT_INIT_PRIORITY)
33287 sprintf (buf, ".dtors.%.5u",
33288 /* Invert the numbering so the linker puts us in the proper
33289 order; constructors are run from right to left, and the
33290 linker sorts in increasing order. */
33291 MAX_INIT_PRIORITY - priority);
33295 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33296 assemble_align (POINTER_SIZE);
33298 if (DEFAULT_ABI == ABI_V4
33299 && (TARGET_RELOCATABLE || flag_pic > 1))
33301 fputs ("\t.long (", asm_out_file);
33302 output_addr_const (asm_out_file, symbol);
33303 fputs (")@fixup\n", asm_out_file);
33306 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33310 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
33312 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
33314 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
33315 ASM_OUTPUT_LABEL (file, name);
33316 fputs (DOUBLE_INT_ASM_OP, file);
33317 rs6000_output_function_entry (file, name);
33318 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
33321 fputs ("\t.size\t", file);
33322 assemble_name (file, name);
33323 fputs (",24\n\t.type\t.", file);
33324 assemble_name (file, name);
33325 fputs (",@function\n", file);
33326 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
33328 fputs ("\t.globl\t.", file);
33329 assemble_name (file, name);
33334 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33335 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33336 rs6000_output_function_entry (file, name);
33337 fputs (":\n", file);
33342 if (DEFAULT_ABI == ABI_V4
33343 && (TARGET_RELOCATABLE || flag_pic > 1)
33344 && !TARGET_SECURE_PLT
33345 && (!constant_pool_empty_p () || crtl->profile)
33346 && (uses_toc = uses_TOC ()))
33351 switch_to_other_text_partition ();
33352 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33354 fprintf (file, "\t.long ");
33355 assemble_name (file, toc_label_name);
33358 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33359 assemble_name (file, buf);
33362 switch_to_other_text_partition ();
33365 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33366 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33368 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
33372 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33374 fprintf (file, "\t.quad .TOC.-");
33375 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33376 assemble_name (file, buf);
33380 if (DEFAULT_ABI == ABI_AIX)
33382 const char *desc_name, *orig_name;
33384 orig_name = (*targetm.strip_name_encoding) (name);
33385 desc_name = orig_name;
33386 while (*desc_name == '.')
33389 if (TREE_PUBLIC (decl))
33390 fprintf (file, "\t.globl %s\n", desc_name);
33392 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33393 fprintf (file, "%s:\n", desc_name);
33394 fprintf (file, "\t.long %s\n", orig_name);
33395 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
33396 fputs ("\t.long 0\n", file);
33397 fprintf (file, "\t.previous\n");
33399 ASM_OUTPUT_LABEL (file, name);
33402 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
33404 rs6000_elf_file_end (void)
33406 #ifdef HAVE_AS_GNU_ATTRIBUTE
33407 /* ??? The value emitted depends on options active at file end.
33408 Assume anyone using #pragma or attributes that might change
33409 options knows what they are doing. */
33410 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
33411 && rs6000_passes_float)
33415 if (TARGET_HARD_FLOAT)
33419 if (rs6000_passes_long_double)
33421 if (!TARGET_LONG_DOUBLE_128)
33423 else if (TARGET_IEEEQUAD)
33428 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
33430 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
33432 if (rs6000_passes_vector)
33433 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
33434 (TARGET_ALTIVEC_ABI ? 2 : 1));
33435 if (rs6000_returns_struct)
33436 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
33437 aix_struct_return ? 2 : 1);
33440 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
33441 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
33442 file_end_indicate_exec_stack ();
33445 if (flag_split_stack)
33446 file_end_indicate_split_stack ();
33450 /* We have expanded a CPU builtin, so we need to emit a reference to
33451 the special symbol that LIBC uses to declare it supports the
33452 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
33453 switch_to_section (data_section);
33454 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
33455 fprintf (asm_out_file, "\t%s %s\n",
33456 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
33463 #ifndef HAVE_XCOFF_DWARF_EXTRAS
33464 #define HAVE_XCOFF_DWARF_EXTRAS 0
33467 static enum unwind_info_type
33468 rs6000_xcoff_debug_unwind_info (void)
33474 rs6000_xcoff_asm_output_anchor (rtx symbol)
33478 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
33479 SYMBOL_REF_BLOCK_OFFSET (symbol));
33480 fprintf (asm_out_file, "%s", SET_ASM_OP);
33481 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
33482 fprintf (asm_out_file, ",");
33483 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
33484 fprintf (asm_out_file, "\n");
33488 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
33490 fputs (GLOBAL_ASM_OP, stream);
33491 RS6000_OUTPUT_BASENAME (stream, name);
33492 putc ('\n', stream);
33495 /* A get_unnamed_decl callback, used for read-only sections. PTR
33496 points to the section string variable. */
33499 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
33501 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
33502 *(const char *const *) directive,
33503 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33506 /* Likewise for read-write sections. */
33509 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
33511 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
33512 *(const char *const *) directive,
33513 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33517 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
33519 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
33520 *(const char *const *) directive,
33521 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33524 /* A get_unnamed_section callback, used for switching to toc_section. */
33527 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
33529 if (TARGET_MINIMAL_TOC)
33531 /* toc_section is always selected at least once from
33532 rs6000_xcoff_file_start, so this is guaranteed to
33533 always be defined once and only once in each file. */
33534 if (!toc_initialized)
33536 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
33537 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
33538 toc_initialized = 1;
33540 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
33541 (TARGET_32BIT ? "" : ",3"));
33544 fputs ("\t.toc\n", asm_out_file);
33547 /* Implement TARGET_ASM_INIT_SECTIONS. */
33550 rs6000_xcoff_asm_init_sections (void)
33552 read_only_data_section
33553 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33554 &xcoff_read_only_section_name);
33556 private_data_section
33557 = get_unnamed_section (SECTION_WRITE,
33558 rs6000_xcoff_output_readwrite_section_asm_op,
33559 &xcoff_private_data_section_name);
33561 read_only_private_data_section
33562 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33563 &xcoff_private_rodata_section_name);
33566 = get_unnamed_section (SECTION_TLS,
33567 rs6000_xcoff_output_tls_section_asm_op,
33568 &xcoff_tls_data_section_name);
33570 tls_private_data_section
33571 = get_unnamed_section (SECTION_TLS,
33572 rs6000_xcoff_output_tls_section_asm_op,
33573 &xcoff_private_data_section_name);
33576 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
33578 readonly_data_section = read_only_data_section;
33582 rs6000_xcoff_reloc_rw_mask (void)
33588 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
33589 tree decl ATTRIBUTE_UNUSED)
33592 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
33594 if (flags & SECTION_EXCLUDE)
33596 else if (flags & SECTION_DEBUG)
33598 fprintf (asm_out_file, "\t.dwsect %s\n", name);
33601 else if (flags & SECTION_CODE)
33603 else if (flags & SECTION_TLS)
33605 else if (flags & SECTION_WRITE)
33610 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
33611 (flags & SECTION_CODE) ? "." : "",
33612 name, suffix[smclass], flags & SECTION_ENTSIZE);
33615 #define IN_NAMED_SECTION(DECL) \
33616 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
33617 && DECL_SECTION_NAME (DECL) != NULL)
33620 rs6000_xcoff_select_section (tree decl, int reloc,
33621 unsigned HOST_WIDE_INT align)
33623 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
33625 if (align > BIGGEST_ALIGNMENT)
33627 resolve_unique_section (decl, reloc, true);
33628 if (IN_NAMED_SECTION (decl))
33629 return get_named_section (decl, NULL, reloc);
33632 if (decl_readonly_section (decl, reloc))
33634 if (TREE_PUBLIC (decl))
33635 return read_only_data_section;
33637 return read_only_private_data_section;
33642 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
33644 if (TREE_PUBLIC (decl))
33645 return tls_data_section;
33646 else if (bss_initializer_p (decl))
33648 /* Convert to COMMON to emit in BSS. */
33649 DECL_COMMON (decl) = 1;
33650 return tls_comm_section;
33653 return tls_private_data_section;
33657 if (TREE_PUBLIC (decl))
33658 return data_section;
33660 return private_data_section;
33665 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
33669 /* Use select_section for private data and uninitialized data with
33670 alignment <= BIGGEST_ALIGNMENT. */
33671 if (!TREE_PUBLIC (decl)
33672 || DECL_COMMON (decl)
33673 || (DECL_INITIAL (decl) == NULL_TREE
33674 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
33675 || DECL_INITIAL (decl) == error_mark_node
33676 || (flag_zero_initialized_in_bss
33677 && initializer_zerop (DECL_INITIAL (decl))))
33680 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
33681 name = (*targetm.strip_name_encoding) (name);
33682 set_decl_section_name (decl, name);
33685 /* Select section for constant in constant pool.
33687 On RS/6000, all constants are in the private read-only data area.
33688 However, if this is being placed in the TOC it must be output as a
33692 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
33693 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
33695 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
33696 return toc_section;
33698 return read_only_private_data_section;
33701 /* Remove any trailing [DS] or the like from the symbol name. */
33703 static const char *
33704 rs6000_xcoff_strip_name_encoding (const char *name)
33709 len = strlen (name);
33710 if (name[len - 1] == ']')
33711 return ggc_alloc_string (name, len - 4);
33716 /* Section attributes. AIX is always PIC. */
33718 static unsigned int
33719 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
33721 unsigned int align;
33722 unsigned int flags = default_section_type_flags (decl, name, reloc);
33724 /* Align to at least UNIT size. */
33725 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
33726 align = MIN_UNITS_PER_WORD;
33728 /* Increase alignment of large objects if not already stricter. */
33729 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
33730 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
33731 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
33733 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
33736 /* Output at beginning of assembler file.
33738 Initialize the section names for the RS/6000 at this point.
33740 Specify filename, including full path, to assembler.
33742 We want to go into the TOC section so at least one .toc will be emitted.
33743 Also, in order to output proper .bs/.es pairs, we need at least one static
33744 [RW] section emitted.
33746 Finally, declare mcount when profiling to make the assembler happy. */
33749 rs6000_xcoff_file_start (void)
33751 rs6000_gen_section_name (&xcoff_bss_section_name,
33752 main_input_filename, ".bss_");
33753 rs6000_gen_section_name (&xcoff_private_data_section_name,
33754 main_input_filename, ".rw_");
33755 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
33756 main_input_filename, ".rop_");
33757 rs6000_gen_section_name (&xcoff_read_only_section_name,
33758 main_input_filename, ".ro_");
33759 rs6000_gen_section_name (&xcoff_tls_data_section_name,
33760 main_input_filename, ".tls_");
33761 rs6000_gen_section_name (&xcoff_tbss_section_name,
33762 main_input_filename, ".tbss_[UL]");
33764 fputs ("\t.file\t", asm_out_file);
33765 output_quoted_string (asm_out_file, main_input_filename);
33766 fputc ('\n', asm_out_file);
33767 if (write_symbols != NO_DEBUG)
33768 switch_to_section (private_data_section);
33769 switch_to_section (toc_section);
33770 switch_to_section (text_section);
33772 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
33773 rs6000_file_start ();
33776 /* Output at end of assembler file.
33777 On the RS/6000, referencing data should automatically pull in text. */
33780 rs6000_xcoff_file_end (void)
33782 switch_to_section (text_section);
33783 fputs ("_section_.text:\n", asm_out_file);
33784 switch_to_section (data_section);
33785 fputs (TARGET_32BIT
33786 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
33790 struct declare_alias_data
33793 bool function_descriptor;
33796 /* Declare alias N. A helper function for for_node_and_aliases. */
33799 rs6000_declare_alias (struct symtab_node *n, void *d)
33801 struct declare_alias_data *data = (struct declare_alias_data *)d;
33802 /* Main symbol is output specially, because varasm machinery does part of
33803 the job for us - we do not need to declare .globl/lglobs and such. */
33804 if (!n->alias || n->weakref)
33807 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
33810 /* Prevent assemble_alias from trying to use .set pseudo operation
33811 that does not behave as expected by the middle-end. */
33812 TREE_ASM_WRITTEN (n->decl) = true;
33814 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
33815 char *buffer = (char *) alloca (strlen (name) + 2);
33817 int dollar_inside = 0;
33819 strcpy (buffer, name);
33820 p = strchr (buffer, '$');
33824 p = strchr (p + 1, '$');
33826 if (TREE_PUBLIC (n->decl))
33828 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
33830 if (dollar_inside) {
33831 if (data->function_descriptor)
33832 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
33833 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
33835 if (data->function_descriptor)
33837 fputs ("\t.globl .", data->file);
33838 RS6000_OUTPUT_BASENAME (data->file, buffer);
33839 putc ('\n', data->file);
33841 fputs ("\t.globl ", data->file);
33842 RS6000_OUTPUT_BASENAME (data->file, buffer);
33843 putc ('\n', data->file);
33845 #ifdef ASM_WEAKEN_DECL
33846 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
33847 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
33854 if (data->function_descriptor)
33855 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
33856 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
33858 if (data->function_descriptor)
33860 fputs ("\t.lglobl .", data->file);
33861 RS6000_OUTPUT_BASENAME (data->file, buffer);
33862 putc ('\n', data->file);
33864 fputs ("\t.lglobl ", data->file);
33865 RS6000_OUTPUT_BASENAME (data->file, buffer);
33866 putc ('\n', data->file);
33868 if (data->function_descriptor)
33869 fputs (".", data->file);
33870 RS6000_OUTPUT_BASENAME (data->file, buffer);
33871 fputs (":\n", data->file);
33876 #ifdef HAVE_GAS_HIDDEN
33877 /* Helper function to calculate visibility of a DECL
33878 and return the value as a const string. */
33880 static const char *
33881 rs6000_xcoff_visibility (tree decl)
33883 static const char * const visibility_types[] = {
33884 "", ",protected", ",hidden", ",internal"
33887 enum symbol_visibility vis = DECL_VISIBILITY (decl);
33888 return visibility_types[vis];
33893 /* This macro produces the initial definition of a function name.
33894 On the RS/6000, we need to place an extra '.' in the function name and
33895 output the function descriptor.
33896 Dollar signs are converted to underscores.
33898 The csect for the function will have already been created when
33899 text_section was selected. We do have to go back to that csect, however.
33901 The third and fourth parameters to the .function pseudo-op (16 and 044)
33902 are placeholders which no longer have any use.
33904 Because AIX assembler's .set command has unexpected semantics, we output
33905 all aliases as alternative labels in front of the definition. */
33908 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
33910 char *buffer = (char *) alloca (strlen (name) + 1);
33912 int dollar_inside = 0;
33913 struct declare_alias_data data = {file, false};
33915 strcpy (buffer, name);
33916 p = strchr (buffer, '$');
33920 p = strchr (p + 1, '$');
33922 if (TREE_PUBLIC (decl))
33924 if (!RS6000_WEAK || !DECL_WEAK (decl))
33926 if (dollar_inside) {
33927 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
33928 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
33930 fputs ("\t.globl .", file);
33931 RS6000_OUTPUT_BASENAME (file, buffer);
33932 #ifdef HAVE_GAS_HIDDEN
33933 fputs (rs6000_xcoff_visibility (decl), file);
33940 if (dollar_inside) {
33941 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
33942 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
33944 fputs ("\t.lglobl .", file);
33945 RS6000_OUTPUT_BASENAME (file, buffer);
33948 fputs ("\t.csect ", file);
33949 RS6000_OUTPUT_BASENAME (file, buffer);
33950 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
33951 RS6000_OUTPUT_BASENAME (file, buffer);
33952 fputs (":\n", file);
33953 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
33955 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
33956 RS6000_OUTPUT_BASENAME (file, buffer);
33957 fputs (", TOC[tc0], 0\n", file);
33959 switch_to_section (function_section (decl));
33961 RS6000_OUTPUT_BASENAME (file, buffer);
33962 fputs (":\n", file);
33963 data.function_descriptor = true;
33964 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
33966 if (!DECL_IGNORED_P (decl))
33968 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33969 xcoffout_declare_function (file, decl, buffer);
33970 else if (write_symbols == DWARF2_DEBUG)
33972 name = (*targetm.strip_name_encoding) (name);
33973 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
33980 /* Output assembly language to globalize a symbol from a DECL,
33981 possibly with visibility. */
33984 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
33986 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
33987 fputs (GLOBAL_ASM_OP, stream);
33988 RS6000_OUTPUT_BASENAME (stream, name);
33989 #ifdef HAVE_GAS_HIDDEN
33990 fputs (rs6000_xcoff_visibility (decl), stream);
33992 putc ('\n', stream);
33995 /* Output assembly language to define a symbol as COMMON from a DECL,
33996 possibly with visibility. */
33999 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
34000 tree decl ATTRIBUTE_UNUSED,
34002 unsigned HOST_WIDE_INT size,
34003 unsigned HOST_WIDE_INT align)
34005 unsigned HOST_WIDE_INT align2 = 2;
34008 align2 = floor_log2 (align / BITS_PER_UNIT);
34012 fputs (COMMON_ASM_OP, stream);
34013 RS6000_OUTPUT_BASENAME (stream, name);
34016 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
34019 #ifdef HAVE_GAS_HIDDEN
34021 fputs (rs6000_xcoff_visibility (decl), stream);
34023 putc ('\n', stream);
34026 /* This macro produces the initial definition of a object (variable) name.
34027 Because AIX assembler's .set command has unexpected semantics, we output
34028 all aliases as alternative labels in front of the definition. */
34031 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
34033 struct declare_alias_data data = {file, false};
34034 RS6000_OUTPUT_BASENAME (file, name);
34035 fputs (":\n", file);
34036 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34040 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
34043 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
34045 fputs (integer_asm_op (size, FALSE), file);
34046 assemble_name (file, label);
34047 fputs ("-$", file);
34050 /* Output a symbol offset relative to the dbase for the current object.
34051 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
34054 __gcc_unwind_dbase is embedded in all executables/libraries through
34055 libgcc/config/rs6000/crtdbase.S. */
34058 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
34060 fputs (integer_asm_op (size, FALSE), file);
34061 assemble_name (file, label);
34062 fputs("-__gcc_unwind_dbase", file);
34067 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
34071 const char *symname;
34073 default_encode_section_info (decl, rtl, first);
34075 /* Careful not to prod global register variables. */
34078 symbol = XEXP (rtl, 0);
34079 if (!SYMBOL_REF_P (symbol))
34082 flags = SYMBOL_REF_FLAGS (symbol);
34084 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
34085 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
34087 SYMBOL_REF_FLAGS (symbol) = flags;
34089 /* Append mapping class to extern decls. */
34090 symname = XSTR (symbol, 0);
34091 if (decl /* sync condition with assemble_external () */
34092 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
34093 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
34094 || TREE_CODE (decl) == FUNCTION_DECL)
34095 && symname[strlen (symname) - 1] != ']')
34097 char *newname = (char *) alloca (strlen (symname) + 5);
34098 strcpy (newname, symname);
34099 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
34100 ? "[DS]" : "[UA]"));
34101 XSTR (symbol, 0) = ggc_strdup (newname);
34104 #endif /* HAVE_AS_TLS */
34105 #endif /* TARGET_XCOFF */
34108 rs6000_asm_weaken_decl (FILE *stream, tree decl,
34109 const char *name, const char *val)
34111 fputs ("\t.weak\t", stream);
34112 RS6000_OUTPUT_BASENAME (stream, name);
34113 if (decl && TREE_CODE (decl) == FUNCTION_DECL
34114 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
34117 fputs ("[DS]", stream);
34118 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
34120 fputs (rs6000_xcoff_visibility (decl), stream);
34122 fputs ("\n\t.weak\t.", stream);
34123 RS6000_OUTPUT_BASENAME (stream, name);
34125 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
34127 fputs (rs6000_xcoff_visibility (decl), stream);
34129 fputc ('\n', stream);
34132 #ifdef ASM_OUTPUT_DEF
34133 ASM_OUTPUT_DEF (stream, name, val);
34135 if (decl && TREE_CODE (decl) == FUNCTION_DECL
34136 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
34138 fputs ("\t.set\t.", stream);
34139 RS6000_OUTPUT_BASENAME (stream, name);
34140 fputs (",.", stream);
34141 RS6000_OUTPUT_BASENAME (stream, val);
34142 fputc ('\n', stream);
34148 /* Return true if INSN should not be copied. */
34151 rs6000_cannot_copy_insn_p (rtx_insn *insn)
34153 return recog_memoized (insn) >= 0
34154 && get_attr_cannot_copy (insn);
34157 /* Compute a (partial) cost for rtx X. Return true if the complete
34158 cost has been computed, and false if subexpressions should be
34159 scanned. In either case, *TOTAL contains the cost result. */
34162 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
34163 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
34165 int code = GET_CODE (x);
34169 /* On the RS/6000, if it is valid in the insn, it is free. */
34171 if (((outer_code == SET
34172 || outer_code == PLUS
34173 || outer_code == MINUS)
34174 && (satisfies_constraint_I (x)
34175 || satisfies_constraint_L (x)))
34176 || (outer_code == AND
34177 && (satisfies_constraint_K (x)
34179 ? satisfies_constraint_L (x)
34180 : satisfies_constraint_J (x))))
34181 || ((outer_code == IOR || outer_code == XOR)
34182 && (satisfies_constraint_K (x)
34184 ? satisfies_constraint_L (x)
34185 : satisfies_constraint_J (x))))
34186 || outer_code == ASHIFT
34187 || outer_code == ASHIFTRT
34188 || outer_code == LSHIFTRT
34189 || outer_code == ROTATE
34190 || outer_code == ROTATERT
34191 || outer_code == ZERO_EXTRACT
34192 || (outer_code == MULT
34193 && satisfies_constraint_I (x))
34194 || ((outer_code == DIV || outer_code == UDIV
34195 || outer_code == MOD || outer_code == UMOD)
34196 && exact_log2 (INTVAL (x)) >= 0)
34197 || (outer_code == COMPARE
34198 && (satisfies_constraint_I (x)
34199 || satisfies_constraint_K (x)))
34200 || ((outer_code == EQ || outer_code == NE)
34201 && (satisfies_constraint_I (x)
34202 || satisfies_constraint_K (x)
34204 ? satisfies_constraint_L (x)
34205 : satisfies_constraint_J (x))))
34206 || (outer_code == GTU
34207 && satisfies_constraint_I (x))
34208 || (outer_code == LTU
34209 && satisfies_constraint_P (x)))
34214 else if ((outer_code == PLUS
34215 && reg_or_add_cint_operand (x, VOIDmode))
34216 || (outer_code == MINUS
34217 && reg_or_sub_cint_operand (x, VOIDmode))
34218 || ((outer_code == SET
34219 || outer_code == IOR
34220 || outer_code == XOR)
34222 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
34224 *total = COSTS_N_INSNS (1);
34230 case CONST_WIDE_INT:
34234 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34238 /* When optimizing for size, MEM should be slightly more expensive
34239 than generating address, e.g., (plus (reg) (const)).
34240 L1 cache latency is about two instructions. */
34241 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34242 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
34243 *total += COSTS_N_INSNS (100);
34252 if (FLOAT_MODE_P (mode))
34253 *total = rs6000_cost->fp;
34255 *total = COSTS_N_INSNS (1);
34259 if (CONST_INT_P (XEXP (x, 1))
34260 && satisfies_constraint_I (XEXP (x, 1)))
34262 if (INTVAL (XEXP (x, 1)) >= -256
34263 && INTVAL (XEXP (x, 1)) <= 255)
34264 *total = rs6000_cost->mulsi_const9;
34266 *total = rs6000_cost->mulsi_const;
34268 else if (mode == SFmode)
34269 *total = rs6000_cost->fp;
34270 else if (FLOAT_MODE_P (mode))
34271 *total = rs6000_cost->dmul;
34272 else if (mode == DImode)
34273 *total = rs6000_cost->muldi;
34275 *total = rs6000_cost->mulsi;
34279 if (mode == SFmode)
34280 *total = rs6000_cost->fp;
34282 *total = rs6000_cost->dmul;
34287 if (FLOAT_MODE_P (mode))
34289 *total = mode == DFmode ? rs6000_cost->ddiv
34290 : rs6000_cost->sdiv;
34297 if (CONST_INT_P (XEXP (x, 1))
34298 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
34300 if (code == DIV || code == MOD)
34302 *total = COSTS_N_INSNS (2);
34305 *total = COSTS_N_INSNS (1);
34309 if (GET_MODE (XEXP (x, 1)) == DImode)
34310 *total = rs6000_cost->divdi;
34312 *total = rs6000_cost->divsi;
34314 /* Add in shift and subtract for MOD unless we have a mod instruction. */
34315 if (!TARGET_MODULO && (code == MOD || code == UMOD))
34316 *total += COSTS_N_INSNS (2);
34320 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
34324 *total = COSTS_N_INSNS (4);
34328 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
34332 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
34336 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
34339 *total = COSTS_N_INSNS (1);
34343 if (CONST_INT_P (XEXP (x, 1)))
34345 rtx left = XEXP (x, 0);
34346 rtx_code left_code = GET_CODE (left);
34348 /* rotate-and-mask: 1 insn. */
34349 if ((left_code == ROTATE
34350 || left_code == ASHIFT
34351 || left_code == LSHIFTRT)
34352 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
34354 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
34355 if (!CONST_INT_P (XEXP (left, 1)))
34356 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
34357 *total += COSTS_N_INSNS (1);
34361 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
34362 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
34363 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
34364 || (val & 0xffff) == val
34365 || (val & 0xffff0000) == val
34366 || ((val & 0xffff) == 0 && mode == SImode))
34368 *total = rtx_cost (left, mode, AND, 0, speed);
34369 *total += COSTS_N_INSNS (1);
34374 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
34376 *total = rtx_cost (left, mode, AND, 0, speed);
34377 *total += COSTS_N_INSNS (2);
34382 *total = COSTS_N_INSNS (1);
34387 *total = COSTS_N_INSNS (1);
34393 *total = COSTS_N_INSNS (1);
34397 /* The EXTSWSLI instruction is a combined instruction. Don't count both
34398 the sign extend and shift separately within the insn. */
34399 if (TARGET_EXTSWSLI && mode == DImode
34400 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
34401 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
34412 /* Handle mul_highpart. */
34413 if (outer_code == TRUNCATE
34414 && GET_CODE (XEXP (x, 0)) == MULT)
34416 if (mode == DImode)
34417 *total = rs6000_cost->muldi;
34419 *total = rs6000_cost->mulsi;
34422 else if (outer_code == AND)
34425 *total = COSTS_N_INSNS (1);
34430 if (MEM_P (XEXP (x, 0)))
34433 *total = COSTS_N_INSNS (1);
34439 if (!FLOAT_MODE_P (mode))
34441 *total = COSTS_N_INSNS (1);
34447 case UNSIGNED_FLOAT:
34450 case FLOAT_TRUNCATE:
34451 *total = rs6000_cost->fp;
34455 if (mode == DFmode)
34456 *total = rs6000_cost->sfdf_convert;
34458 *total = rs6000_cost->fp;
34462 switch (XINT (x, 1))
34465 *total = rs6000_cost->fp;
34477 *total = COSTS_N_INSNS (1);
34480 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
34482 *total = rs6000_cost->fp;
34491 /* Carry bit requires mode == Pmode.
34492 NEG or PLUS already counted so only add one. */
34494 && (outer_code == NEG || outer_code == PLUS))
34496 *total = COSTS_N_INSNS (1);
34504 if (outer_code == SET)
34506 if (XEXP (x, 1) == const0_rtx)
34508 *total = COSTS_N_INSNS (2);
34513 *total = COSTS_N_INSNS (3);
34518 if (outer_code == COMPARE)
34532 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
34535 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
34536 int opno, int *total, bool speed)
34538 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
34541 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
34542 "opno = %d, total = %d, speed = %s, x:\n",
34543 ret ? "complete" : "scan inner",
34544 GET_MODE_NAME (mode),
34545 GET_RTX_NAME (outer_code),
34548 speed ? "true" : "false");
34556 rs6000_insn_cost (rtx_insn *insn, bool speed)
34558 if (recog_memoized (insn) < 0)
34562 return get_attr_length (insn);
34564 int cost = get_attr_cost (insn);
34568 int n = get_attr_length (insn) / 4;
34569 enum attr_type type = get_attr_type (insn);
34576 cost = COSTS_N_INSNS (n + 1);
34580 switch (get_attr_size (insn))
34583 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
34586 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
34589 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
34592 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
34595 gcc_unreachable ();
34599 switch (get_attr_size (insn))
34602 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
34605 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
34608 gcc_unreachable ();
34613 cost = n * rs6000_cost->fp;
34616 cost = n * rs6000_cost->dmul;
34619 cost = n * rs6000_cost->sdiv;
34622 cost = n * rs6000_cost->ddiv;
34629 cost = COSTS_N_INSNS (n + 2);
34633 cost = COSTS_N_INSNS (n);
34639 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
34642 rs6000_debug_address_cost (rtx x, machine_mode mode,
34643 addr_space_t as, bool speed)
34645 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
34647 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
34648 ret, speed ? "true" : "false");
34655 /* A C expression returning the cost of moving data from a register of class
34656 CLASS1 to one of CLASS2. */
34659 rs6000_register_move_cost (machine_mode mode,
34660 reg_class_t from, reg_class_t to)
34663 reg_class_t rclass;
34665 if (TARGET_DEBUG_COST)
34668 /* If we have VSX, we can easily move between FPR or Altivec registers,
34669 otherwise we can only easily move within classes.
34670 Do this first so we give best-case answers for union classes
34671 containing both gprs and vsx regs. */
34672 HARD_REG_SET to_vsx, from_vsx;
34673 COPY_HARD_REG_SET (to_vsx, reg_class_contents[to]);
34674 AND_HARD_REG_SET (to_vsx, reg_class_contents[VSX_REGS]);
34675 COPY_HARD_REG_SET (from_vsx, reg_class_contents[from]);
34676 AND_HARD_REG_SET (from_vsx, reg_class_contents[VSX_REGS]);
34677 if (!hard_reg_set_empty_p (to_vsx)
34678 && !hard_reg_set_empty_p (from_vsx)
34680 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
34682 int reg = FIRST_FPR_REGNO;
34684 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
34685 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
34686 reg = FIRST_ALTIVEC_REGNO;
34687 ret = 2 * hard_regno_nregs (reg, mode);
34690 /* Moves from/to GENERAL_REGS. */
34691 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
34692 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
34694 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
34696 if (TARGET_DIRECT_MOVE)
34698 if (rs6000_tune == PROCESSOR_POWER9)
34699 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
34701 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
34702 /* SFmode requires a conversion when moving between gprs
34704 if (mode == SFmode)
34708 ret = (rs6000_memory_move_cost (mode, rclass, false)
34709 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
34712 /* It's more expensive to move CR_REGS than CR0_REGS because of the
34714 else if (rclass == CR_REGS)
34717 /* For those processors that have slow LR/CTR moves, make them more
34718 expensive than memory in order to bias spills to memory .*/
34719 else if ((rs6000_tune == PROCESSOR_POWER6
34720 || rs6000_tune == PROCESSOR_POWER7
34721 || rs6000_tune == PROCESSOR_POWER8
34722 || rs6000_tune == PROCESSOR_POWER9)
34723 && reg_class_subset_p (rclass, SPECIAL_REGS))
34724 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
34727 /* A move will cost one instruction per GPR moved. */
34728 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
34731 /* Everything else has to go through GENERAL_REGS. */
34733 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
34734 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
34736 if (TARGET_DEBUG_COST)
34738 if (dbg_cost_ctrl == 1)
34740 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
34741 ret, GET_MODE_NAME (mode), reg_class_names[from],
34742 reg_class_names[to]);
34749 /* A C expressions returning the cost of moving data of MODE from a register to
34753 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
34754 bool in ATTRIBUTE_UNUSED)
34758 if (TARGET_DEBUG_COST)
34761 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
34762 ret = 4 * hard_regno_nregs (0, mode);
34763 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
34764 || reg_classes_intersect_p (rclass, VSX_REGS)))
34765 ret = 4 * hard_regno_nregs (32, mode);
34766 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
34767 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
34769 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
34771 if (TARGET_DEBUG_COST)
34773 if (dbg_cost_ctrl == 1)
34775 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
34776 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
34783 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
34785 The register allocator chooses GEN_OR_VSX_REGS for the allocno
34786 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
34787 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
34788 move cost between GENERAL_REGS and VSX_REGS low.
34790 It might seem reasonable to use a union class. After all, if usage
34791 of vsr is low and gpr high, it might make sense to spill gpr to vsr
34792 rather than memory. However, in cases where register pressure of
34793 both is high, like the cactus_adm spec test, allowing
34794 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
34795 the first scheduling pass. This is partly due to an allocno of
34796 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
34797 class, which gives too high a pressure for GENERAL_REGS and too low
34798 for VSX_REGS. So, force a choice of the subclass here.
34800 The best class is also the union if GENERAL_REGS and VSX_REGS have
34801 the same cost. In that case we do use GEN_OR_VSX_REGS as the
34802 allocno class, since trying to narrow down the class by regno mode
34803 is prone to error. For example, SImode is allowed in VSX regs and
34804 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
34805 it would be wrong to choose an allocno of GENERAL_REGS based on
34809 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
34810 reg_class_t allocno_class,
34811 reg_class_t best_class)
34813 switch (allocno_class)
34815 case GEN_OR_VSX_REGS:
34816 /* best_class must be a subset of allocno_class. */
34817 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
34818 || best_class == GEN_OR_FLOAT_REGS
34819 || best_class == VSX_REGS
34820 || best_class == ALTIVEC_REGS
34821 || best_class == FLOAT_REGS
34822 || best_class == GENERAL_REGS
34823 || best_class == BASE_REGS);
34824 /* Use best_class but choose wider classes when copying from the
34825 wider class to best_class is cheap. This mimics IRA choice
34826 of allocno class. */
34827 if (best_class == BASE_REGS)
34828 return GENERAL_REGS;
34830 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
34838 return allocno_class;
34841 /* Returns a code for a target-specific builtin that implements
34842 reciprocal of the function, or NULL_TREE if not available. */
34845 rs6000_builtin_reciprocal (tree fndecl)
34847 switch (DECL_FUNCTION_CODE (fndecl))
34849 case VSX_BUILTIN_XVSQRTDP:
34850 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
34853 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
34855 case VSX_BUILTIN_XVSQRTSP:
34856 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
34859 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
34866 /* Load up a constant. If the mode is a vector mode, splat the value across
34867 all of the vector elements. */
34870 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
34874 if (mode == SFmode || mode == DFmode)
34876 rtx d = const_double_from_real_value (dconst, mode);
34877 reg = force_reg (mode, d);
34879 else if (mode == V4SFmode)
34881 rtx d = const_double_from_real_value (dconst, SFmode);
34882 rtvec v = gen_rtvec (4, d, d, d, d);
34883 reg = gen_reg_rtx (mode);
34884 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34886 else if (mode == V2DFmode)
34888 rtx d = const_double_from_real_value (dconst, DFmode);
34889 rtvec v = gen_rtvec (2, d, d);
34890 reg = gen_reg_rtx (mode);
34891 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34894 gcc_unreachable ();
34899 /* Generate an FMA instruction. */
34902 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
34904 machine_mode mode = GET_MODE (target);
34907 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
34908 gcc_assert (dst != NULL);
34911 emit_move_insn (target, dst);
34914 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
34917 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
34919 machine_mode mode = GET_MODE (dst);
34922 /* This is a tad more complicated, since the fnma_optab is for
34923 a different expression: fma(-m1, m2, a), which is the same
34924 thing except in the case of signed zeros.
34926 Fortunately we know that if FMA is supported that FNMSUB is
34927 also supported in the ISA. Just expand it directly. */
34929 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
34931 r = gen_rtx_NEG (mode, a);
34932 r = gen_rtx_FMA (mode, m1, m2, r);
34933 r = gen_rtx_NEG (mode, r);
34934 emit_insn (gen_rtx_SET (dst, r));
34937 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
34938 add a reg_note saying that this was a division. Support both scalar and
34939 vector divide. Assumes no trapping math and finite arguments. */
34942 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
34944 machine_mode mode = GET_MODE (dst);
34945 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
34948 /* Low precision estimates guarantee 5 bits of accuracy. High
34949 precision estimates guarantee 14 bits of accuracy. SFmode
34950 requires 23 bits of accuracy. DFmode requires 52 bits of
34951 accuracy. Each pass at least doubles the accuracy, leading
34952 to the following. */
34953 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
34954 if (mode == DFmode || mode == V2DFmode)
34957 enum insn_code code = optab_handler (smul_optab, mode);
34958 insn_gen_fn gen_mul = GEN_FCN (code);
34960 gcc_assert (code != CODE_FOR_nothing);
34962 one = rs6000_load_constant_and_splat (mode, dconst1);
34964 /* x0 = 1./d estimate */
34965 x0 = gen_reg_rtx (mode);
34966 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
34969 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
34972 /* e0 = 1. - d * x0 */
34973 e0 = gen_reg_rtx (mode);
34974 rs6000_emit_nmsub (e0, d, x0, one);
34976 /* x1 = x0 + e0 * x0 */
34977 x1 = gen_reg_rtx (mode);
34978 rs6000_emit_madd (x1, e0, x0, x0);
34980 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
34981 ++i, xprev = xnext, eprev = enext) {
34983 /* enext = eprev * eprev */
34984 enext = gen_reg_rtx (mode);
34985 emit_insn (gen_mul (enext, eprev, eprev));
34987 /* xnext = xprev + enext * xprev */
34988 xnext = gen_reg_rtx (mode);
34989 rs6000_emit_madd (xnext, enext, xprev, xprev);
34995 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
34997 /* u = n * xprev */
34998 u = gen_reg_rtx (mode);
34999 emit_insn (gen_mul (u, n, xprev));
35001 /* v = n - (d * u) */
35002 v = gen_reg_rtx (mode);
35003 rs6000_emit_nmsub (v, d, u, n);
35005 /* dst = (v * xprev) + u */
35006 rs6000_emit_madd (dst, v, xprev, u);
35009 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
35012 /* Goldschmidt's Algorithm for single/double-precision floating point
35013 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
35016 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
35018 machine_mode mode = GET_MODE (src);
35019 rtx e = gen_reg_rtx (mode);
35020 rtx g = gen_reg_rtx (mode);
35021 rtx h = gen_reg_rtx (mode);
35023 /* Low precision estimates guarantee 5 bits of accuracy. High
35024 precision estimates guarantee 14 bits of accuracy. SFmode
35025 requires 23 bits of accuracy. DFmode requires 52 bits of
35026 accuracy. Each pass at least doubles the accuracy, leading
35027 to the following. */
35028 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
35029 if (mode == DFmode || mode == V2DFmode)
35034 enum insn_code code = optab_handler (smul_optab, mode);
35035 insn_gen_fn gen_mul = GEN_FCN (code);
35037 gcc_assert (code != CODE_FOR_nothing);
35039 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
35041 /* e = rsqrt estimate */
35042 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
35045 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
35048 rtx zero = force_reg (mode, CONST0_RTX (mode));
35050 if (mode == SFmode)
35052 rtx target = emit_conditional_move (e, GT, src, zero, mode,
35055 emit_move_insn (e, target);
35059 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
35060 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
35064 /* g = sqrt estimate. */
35065 emit_insn (gen_mul (g, e, src));
35066 /* h = 1/(2*sqrt) estimate. */
35067 emit_insn (gen_mul (h, e, mhalf));
35073 rtx t = gen_reg_rtx (mode);
35074 rs6000_emit_nmsub (t, g, h, mhalf);
35075 /* Apply correction directly to 1/rsqrt estimate. */
35076 rs6000_emit_madd (dst, e, t, e);
35080 for (i = 0; i < passes; i++)
35082 rtx t1 = gen_reg_rtx (mode);
35083 rtx g1 = gen_reg_rtx (mode);
35084 rtx h1 = gen_reg_rtx (mode);
35086 rs6000_emit_nmsub (t1, g, h, mhalf);
35087 rs6000_emit_madd (g1, g, t1, g);
35088 rs6000_emit_madd (h1, h, t1, h);
35093 /* Multiply by 2 for 1/rsqrt. */
35094 emit_insn (gen_add3_insn (dst, h, h));
35099 rtx t = gen_reg_rtx (mode);
35100 rs6000_emit_nmsub (t, g, h, mhalf);
35101 rs6000_emit_madd (dst, g, t, g);
35107 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
35108 (Power7) targets. DST is the target, and SRC is the argument operand. */
35111 rs6000_emit_popcount (rtx dst, rtx src)
35113 machine_mode mode = GET_MODE (dst);
35116 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
35117 if (TARGET_POPCNTD)
35119 if (mode == SImode)
35120 emit_insn (gen_popcntdsi2 (dst, src));
35122 emit_insn (gen_popcntddi2 (dst, src));
35126 tmp1 = gen_reg_rtx (mode);
35128 if (mode == SImode)
35130 emit_insn (gen_popcntbsi2 (tmp1, src));
35131 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
35133 tmp2 = force_reg (SImode, tmp2);
35134 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
35138 emit_insn (gen_popcntbdi2 (tmp1, src));
35139 tmp2 = expand_mult (DImode, tmp1,
35140 GEN_INT ((HOST_WIDE_INT)
35141 0x01010101 << 32 | 0x01010101),
35143 tmp2 = force_reg (DImode, tmp2);
35144 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
35149 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
35150 target, and SRC is the argument operand. */
35153 rs6000_emit_parity (rtx dst, rtx src)
35155 machine_mode mode = GET_MODE (dst);
35158 tmp = gen_reg_rtx (mode);
35160 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
35163 if (mode == SImode)
35165 emit_insn (gen_popcntbsi2 (tmp, src));
35166 emit_insn (gen_paritysi2_cmpb (dst, tmp));
35170 emit_insn (gen_popcntbdi2 (tmp, src));
35171 emit_insn (gen_paritydi2_cmpb (dst, tmp));
35176 if (mode == SImode)
35178 /* Is mult+shift >= shift+xor+shift+xor? */
35179 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
35181 rtx tmp1, tmp2, tmp3, tmp4;
35183 tmp1 = gen_reg_rtx (SImode);
35184 emit_insn (gen_popcntbsi2 (tmp1, src));
35186 tmp2 = gen_reg_rtx (SImode);
35187 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
35188 tmp3 = gen_reg_rtx (SImode);
35189 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
35191 tmp4 = gen_reg_rtx (SImode);
35192 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
35193 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
35196 rs6000_emit_popcount (tmp, src);
35197 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
35201 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
35202 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
35204 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
35206 tmp1 = gen_reg_rtx (DImode);
35207 emit_insn (gen_popcntbdi2 (tmp1, src));
35209 tmp2 = gen_reg_rtx (DImode);
35210 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
35211 tmp3 = gen_reg_rtx (DImode);
35212 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
35214 tmp4 = gen_reg_rtx (DImode);
35215 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
35216 tmp5 = gen_reg_rtx (DImode);
35217 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
35219 tmp6 = gen_reg_rtx (DImode);
35220 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
35221 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
35224 rs6000_emit_popcount (tmp, src);
35225 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
35229 /* Expand an Altivec constant permutation for little endian mode.
35230 OP0 and OP1 are the input vectors and TARGET is the output vector.
35231 SEL specifies the constant permutation vector.
35233 There are two issues: First, the two input operands must be
35234 swapped so that together they form a double-wide array in LE
35235 order. Second, the vperm instruction has surprising behavior
35236 in LE mode: it interprets the elements of the source vectors
35237 in BE mode ("left to right") and interprets the elements of
35238 the destination vector in LE mode ("right to left"). To
35239 correct for this, we must subtract each element of the permute
35240 control vector from 31.
35242 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
35243 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
35244 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
35245 serve as the permute control vector. Then, in BE mode,
35249 places the desired result in vr9. However, in LE mode the
35250 vector contents will be
35252 vr10 = 00000003 00000002 00000001 00000000
35253 vr11 = 00000007 00000006 00000005 00000004
35255 The result of the vperm using the same permute control vector is
35257 vr9 = 05000000 07000000 01000000 03000000
35259 That is, the leftmost 4 bytes of vr10 are interpreted as the
35260 source for the rightmost 4 bytes of vr9, and so on.
35262 If we change the permute control vector to
35264 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
35272 vr9 = 00000006 00000004 00000002 00000000. */
35275 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
35276 const vec_perm_indices &sel)
35280 rtx constv, unspec;
35282 /* Unpack and adjust the constant selector. */
35283 for (i = 0; i < 16; ++i)
35285 unsigned int elt = 31 - (sel[i] & 31);
35286 perm[i] = GEN_INT (elt);
35289 /* Expand to a permute, swapping the inputs and using the
35290 adjusted selector. */
35292 op0 = force_reg (V16QImode, op0);
35294 op1 = force_reg (V16QImode, op1);
35296 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
35297 constv = force_reg (V16QImode, constv);
35298 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
35300 if (!REG_P (target))
35302 rtx tmp = gen_reg_rtx (V16QImode);
35303 emit_move_insn (tmp, unspec);
35307 emit_move_insn (target, unspec);
35310 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
35311 permute control vector. But here it's not a constant, so we must
35312 generate a vector NAND or NOR to do the adjustment. */
35315 altivec_expand_vec_perm_le (rtx operands[4])
35317 rtx notx, iorx, unspec;
35318 rtx target = operands[0];
35319 rtx op0 = operands[1];
35320 rtx op1 = operands[2];
35321 rtx sel = operands[3];
35323 rtx norreg = gen_reg_rtx (V16QImode);
35324 machine_mode mode = GET_MODE (target);
35326 /* Get everything in regs so the pattern matches. */
35328 op0 = force_reg (mode, op0);
35330 op1 = force_reg (mode, op1);
35332 sel = force_reg (V16QImode, sel);
35333 if (!REG_P (target))
35334 tmp = gen_reg_rtx (mode);
35336 if (TARGET_P9_VECTOR)
35338 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
35343 /* Invert the selector with a VNAND if available, else a VNOR.
35344 The VNAND is preferred for future fusion opportunities. */
35345 notx = gen_rtx_NOT (V16QImode, sel);
35346 iorx = (TARGET_P8_VECTOR
35347 ? gen_rtx_IOR (V16QImode, notx, notx)
35348 : gen_rtx_AND (V16QImode, notx, notx));
35349 emit_insn (gen_rtx_SET (norreg, iorx));
35351 /* Permute with operands reversed and adjusted selector. */
35352 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
35356 /* Copy into target, possibly by way of a register. */
35357 if (!REG_P (target))
35359 emit_move_insn (tmp, unspec);
35363 emit_move_insn (target, unspec);
35366 /* Expand an Altivec constant permutation. Return true if we match
35367 an efficient implementation; false to fall back to VPERM.
35369 OP0 and OP1 are the input vectors and TARGET is the output vector.
35370 SEL specifies the constant permutation vector. */
35373 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
35374 const vec_perm_indices &sel)
35376 struct altivec_perm_insn {
35377 HOST_WIDE_INT mask;
35378 enum insn_code impl;
35379 unsigned char perm[16];
35381 static const struct altivec_perm_insn patterns[] = {
35382 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
35383 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
35384 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
35385 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
35386 { OPTION_MASK_ALTIVEC,
35387 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
35388 : CODE_FOR_altivec_vmrglb_direct),
35389 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
35390 { OPTION_MASK_ALTIVEC,
35391 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
35392 : CODE_FOR_altivec_vmrglh_direct),
35393 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
35394 { OPTION_MASK_ALTIVEC,
35395 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
35396 : CODE_FOR_altivec_vmrglw_direct),
35397 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
35398 { OPTION_MASK_ALTIVEC,
35399 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
35400 : CODE_FOR_altivec_vmrghb_direct),
35401 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
35402 { OPTION_MASK_ALTIVEC,
35403 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
35404 : CODE_FOR_altivec_vmrghh_direct),
35405 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
35406 { OPTION_MASK_ALTIVEC,
35407 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
35408 : CODE_FOR_altivec_vmrghw_direct),
35409 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
35410 { OPTION_MASK_P8_VECTOR,
35411 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
35412 : CODE_FOR_p8_vmrgow_v4sf_direct),
35413 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
35414 { OPTION_MASK_P8_VECTOR,
35415 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
35416 : CODE_FOR_p8_vmrgew_v4sf_direct),
35417 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
35420 unsigned int i, j, elt, which;
35421 unsigned char perm[16];
35425 /* Unpack the constant selector. */
35426 for (i = which = 0; i < 16; ++i)
35429 which |= (elt < 16 ? 1 : 2);
35433 /* Simplify the constant selector based on operands. */
35437 gcc_unreachable ();
35441 if (!rtx_equal_p (op0, op1))
35446 for (i = 0; i < 16; ++i)
35458 /* Look for splat patterns. */
35463 for (i = 0; i < 16; ++i)
35464 if (perm[i] != elt)
35468 if (!BYTES_BIG_ENDIAN)
35470 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
35476 for (i = 0; i < 16; i += 2)
35477 if (perm[i] != elt || perm[i + 1] != elt + 1)
35481 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
35482 x = gen_reg_rtx (V8HImode);
35483 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
35485 emit_move_insn (target, gen_lowpart (V16QImode, x));
35492 for (i = 0; i < 16; i += 4)
35494 || perm[i + 1] != elt + 1
35495 || perm[i + 2] != elt + 2
35496 || perm[i + 3] != elt + 3)
35500 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
35501 x = gen_reg_rtx (V4SImode);
35502 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
35504 emit_move_insn (target, gen_lowpart (V16QImode, x));
35510 /* Look for merge and pack patterns. */
35511 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
35515 if ((patterns[j].mask & rs6000_isa_flags) == 0)
35518 elt = patterns[j].perm[0];
35519 if (perm[0] == elt)
35521 else if (perm[0] == elt + 16)
35525 for (i = 1; i < 16; ++i)
35527 elt = patterns[j].perm[i];
35529 elt = (elt >= 16 ? elt - 16 : elt + 16);
35530 else if (one_vec && elt >= 16)
35532 if (perm[i] != elt)
35537 enum insn_code icode = patterns[j].impl;
35538 machine_mode omode = insn_data[icode].operand[0].mode;
35539 machine_mode imode = insn_data[icode].operand[1].mode;
35541 /* For little-endian, don't use vpkuwum and vpkuhum if the
35542 underlying vector type is not V4SI and V8HI, respectively.
35543 For example, using vpkuwum with a V8HI picks up the even
35544 halfwords (BE numbering) when the even halfwords (LE
35545 numbering) are what we need. */
35546 if (!BYTES_BIG_ENDIAN
35547 && icode == CODE_FOR_altivec_vpkuwum_direct
35549 && GET_MODE (op0) != V4SImode)
35551 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
35553 if (!BYTES_BIG_ENDIAN
35554 && icode == CODE_FOR_altivec_vpkuhum_direct
35556 && GET_MODE (op0) != V8HImode)
35558 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
35561 /* For little-endian, the two input operands must be swapped
35562 (or swapped back) to ensure proper right-to-left numbering
35564 if (swapped ^ !BYTES_BIG_ENDIAN)
35565 std::swap (op0, op1);
35566 if (imode != V16QImode)
35568 op0 = gen_lowpart (imode, op0);
35569 op1 = gen_lowpart (imode, op1);
35571 if (omode == V16QImode)
35574 x = gen_reg_rtx (omode);
35575 emit_insn (GEN_FCN (icode) (x, op0, op1));
35576 if (omode != V16QImode)
35577 emit_move_insn (target, gen_lowpart (V16QImode, x));
35582 if (!BYTES_BIG_ENDIAN)
35584 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
35591 /* Expand a VSX Permute Doubleword constant permutation.
35592 Return true if we match an efficient implementation. */
35595 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
35596 unsigned char perm0, unsigned char perm1)
35600 /* If both selectors come from the same operand, fold to single op. */
35601 if ((perm0 & 2) == (perm1 & 2))
35608 /* If both operands are equal, fold to simpler permutation. */
35609 if (rtx_equal_p (op0, op1))
35612 perm1 = (perm1 & 1) + 2;
35614 /* If the first selector comes from the second operand, swap. */
35615 else if (perm0 & 2)
35621 std::swap (op0, op1);
35623 /* If the second selector does not come from the second operand, fail. */
35624 else if ((perm1 & 2) == 0)
35628 if (target != NULL)
35630 machine_mode vmode, dmode;
35633 vmode = GET_MODE (target);
35634 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
35635 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
35636 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
35637 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
35638 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
35639 emit_insn (gen_rtx_SET (target, x));
35644 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
35647 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
35648 rtx op1, const vec_perm_indices &sel)
35650 bool testing_p = !target;
35652 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
35653 if (TARGET_ALTIVEC && testing_p)
35656 /* Check for ps_merge* or xxpermdi insns. */
35657 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
35661 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
35662 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
35664 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
35668 if (TARGET_ALTIVEC)
35670 /* Force the target-independent code to lower to V16QImode. */
35671 if (vmode != V16QImode)
35673 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
35680 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
35681 OP0 and OP1 are the input vectors and TARGET is the output vector.
35682 PERM specifies the constant permutation vector. */
35685 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
35686 machine_mode vmode, const vec_perm_builder &perm)
35688 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
35690 emit_move_insn (target, x);
35693 /* Expand an extract even operation. */
35696 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
35698 machine_mode vmode = GET_MODE (target);
35699 unsigned i, nelt = GET_MODE_NUNITS (vmode);
35700 vec_perm_builder perm (nelt, nelt, 1);
35702 for (i = 0; i < nelt; i++)
35703 perm.quick_push (i * 2);
35705 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
35708 /* Expand a vector interleave operation. */
35711 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
35713 machine_mode vmode = GET_MODE (target);
35714 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
35715 vec_perm_builder perm (nelt, nelt, 1);
35717 high = (highp ? 0 : nelt / 2);
35718 for (i = 0; i < nelt / 2; i++)
35720 perm.quick_push (i + high);
35721 perm.quick_push (i + nelt + high);
35724 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
35727 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
35729 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
35731 HOST_WIDE_INT hwi_scale (scale);
35732 REAL_VALUE_TYPE r_pow;
35733 rtvec v = rtvec_alloc (2);
35735 rtx scale_vec = gen_reg_rtx (V2DFmode);
35736 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
35737 elt = const_double_from_real_value (r_pow, DFmode);
35738 RTVEC_ELT (v, 0) = elt;
35739 RTVEC_ELT (v, 1) = elt;
35740 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
35741 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
35744 /* Return an RTX representing where to find the function value of a
35745 function returning MODE. */
35747 rs6000_complex_function_value (machine_mode mode)
35749 unsigned int regno;
35751 machine_mode inner = GET_MODE_INNER (mode);
35752 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
35754 if (TARGET_FLOAT128_TYPE
35756 || (mode == TCmode && TARGET_IEEEQUAD)))
35757 regno = ALTIVEC_ARG_RETURN;
35759 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35760 regno = FP_ARG_RETURN;
35764 regno = GP_ARG_RETURN;
35766 /* 32-bit is OK since it'll go in r3/r4. */
35767 if (TARGET_32BIT && inner_bytes >= 4)
35768 return gen_rtx_REG (mode, regno);
35771 if (inner_bytes >= 8)
35772 return gen_rtx_REG (mode, regno);
35774 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
35776 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
35777 GEN_INT (inner_bytes));
35778 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
35781 /* Return an rtx describing a return value of MODE as a PARALLEL
35782 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
35783 stride REG_STRIDE. */
35786 rs6000_parallel_return (machine_mode mode,
35787 int n_elts, machine_mode elt_mode,
35788 unsigned int regno, unsigned int reg_stride)
35790 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
35793 for (i = 0; i < n_elts; i++)
35795 rtx r = gen_rtx_REG (elt_mode, regno);
35796 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
35797 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
35798 regno += reg_stride;
35804 /* Target hook for TARGET_FUNCTION_VALUE.
35806 An integer value is in r3 and a floating-point value is in fp1,
35807 unless -msoft-float. */
35810 rs6000_function_value (const_tree valtype,
35811 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
35812 bool outgoing ATTRIBUTE_UNUSED)
35815 unsigned int regno;
35816 machine_mode elt_mode;
35819 /* Special handling for structs in darwin64. */
35821 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
35823 CUMULATIVE_ARGS valcum;
35827 valcum.fregno = FP_ARG_MIN_REG;
35828 valcum.vregno = ALTIVEC_ARG_MIN_REG;
35829 /* Do a trial code generation as if this were going to be passed as
35830 an argument; if any part goes in memory, we return NULL. */
35831 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
35834 /* Otherwise fall through to standard ABI rules. */
35837 mode = TYPE_MODE (valtype);
35839 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
35840 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
35842 int first_reg, n_regs;
35844 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
35846 /* _Decimal128 must use even/odd register pairs. */
35847 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35848 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
35852 first_reg = ALTIVEC_ARG_RETURN;
35856 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
35859 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
35860 if (TARGET_32BIT && TARGET_POWERPC64)
35869 int count = GET_MODE_SIZE (mode) / 4;
35870 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
35873 if ((INTEGRAL_TYPE_P (valtype)
35874 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
35875 || POINTER_TYPE_P (valtype))
35876 mode = TARGET_32BIT ? SImode : DImode;
35878 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35879 /* _Decimal128 must use an even/odd register pair. */
35880 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35881 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
35882 && !FLOAT128_VECTOR_P (mode))
35883 regno = FP_ARG_RETURN;
35884 else if (TREE_CODE (valtype) == COMPLEX_TYPE
35885 && targetm.calls.split_complex_arg)
35886 return rs6000_complex_function_value (mode);
35887 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35888 return register is used in both cases, and we won't see V2DImode/V2DFmode
35889 for pure altivec, combine the two cases. */
35890 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
35891 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
35892 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
35893 regno = ALTIVEC_ARG_RETURN;
35895 regno = GP_ARG_RETURN;
35897 return gen_rtx_REG (mode, regno);
35900 /* Define how to find the value returned by a library function
35901 assuming the value has mode MODE. */
35903 rs6000_libcall_value (machine_mode mode)
35905 unsigned int regno;
35907 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
35908 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
35909 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
35911 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35912 /* _Decimal128 must use an even/odd register pair. */
35913 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35914 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
35915 regno = FP_ARG_RETURN;
35916 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35917 return register is used in both cases, and we won't see V2DImode/V2DFmode
35918 for pure altivec, combine the two cases. */
35919 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
35920 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
35921 regno = ALTIVEC_ARG_RETURN;
35922 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
35923 return rs6000_complex_function_value (mode);
35925 regno = GP_ARG_RETURN;
35927 return gen_rtx_REG (mode, regno);
35930 /* Compute register pressure classes. We implement the target hook to avoid
35931 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
35932 lead to incorrect estimates of number of available registers and therefor
35933 increased register pressure/spill. */
35935 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
35940 pressure_classes[n++] = GENERAL_REGS;
35942 pressure_classes[n++] = VSX_REGS;
35945 if (TARGET_ALTIVEC)
35946 pressure_classes[n++] = ALTIVEC_REGS;
35947 if (TARGET_HARD_FLOAT)
35948 pressure_classes[n++] = FLOAT_REGS;
35950 pressure_classes[n++] = CR_REGS;
35951 pressure_classes[n++] = SPECIAL_REGS;
35956 /* Given FROM and TO register numbers, say whether this elimination is allowed.
35957 Frame pointer elimination is automatically handled.
35959 For the RS/6000, if frame pointer elimination is being done, we would like
35960 to convert ap into fp, not sp.
35962 We need r30 if -mminimal-toc was specified, and there are constant pool
35966 rs6000_can_eliminate (const int from, const int to)
35968 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
35969 ? ! frame_pointer_needed
35970 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
35971 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
35972 || constant_pool_empty_p ()
35976 /* Define the offset between two registers, FROM to be eliminated and its
35977 replacement TO, at the start of a routine. */
35979 rs6000_initial_elimination_offset (int from, int to)
35981 rs6000_stack_t *info = rs6000_stack_info ();
35982 HOST_WIDE_INT offset;
35984 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35985 offset = info->push_p ? 0 : -info->total_size;
35986 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35988 offset = info->push_p ? 0 : -info->total_size;
35989 if (FRAME_GROWS_DOWNWARD)
35990 offset += info->fixed_size + info->vars_size + info->parm_size;
35992 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
35993 offset = FRAME_GROWS_DOWNWARD
35994 ? info->fixed_size + info->vars_size + info->parm_size
35996 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
35997 offset = info->total_size;
35998 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35999 offset = info->push_p ? info->total_size : 0;
36000 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
36003 gcc_unreachable ();
36008 /* Fill in sizes of registers used by unwinder. */
36011 rs6000_init_dwarf_reg_sizes_extra (tree address)
36013 if (TARGET_MACHO && ! TARGET_ALTIVEC)
36016 machine_mode mode = TYPE_MODE (char_type_node);
36017 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
36018 rtx mem = gen_rtx_MEM (BLKmode, addr);
36019 rtx value = gen_int_mode (16, mode);
36021 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
36022 The unwinder still needs to know the size of Altivec registers. */
36024 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
36026 int column = DWARF_REG_TO_UNWIND_COLUMN
36027 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
36028 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
36030 emit_move_insn (adjust_address (mem, mode, offset), value);
36035 /* Map internal gcc register numbers to debug format register numbers.
36036 FORMAT specifies the type of debug register number to use:
36037 0 -- debug information, except for frame-related sections
36038 1 -- DWARF .debug_frame section
36039 2 -- DWARF .eh_frame section */
36042 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
36044 /* On some platforms, we use the standard DWARF register
36045 numbering for .debug_info and .debug_frame. */
36046 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
36048 #ifdef RS6000_USE_DWARF_NUMBERING
36051 if (FP_REGNO_P (regno))
36052 return regno - FIRST_FPR_REGNO + 32;
36053 if (ALTIVEC_REGNO_P (regno))
36054 return regno - FIRST_ALTIVEC_REGNO + 1124;
36055 if (regno == LR_REGNO)
36057 if (regno == CTR_REGNO)
36059 if (regno == CA_REGNO)
36060 return 101; /* XER */
36061 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
36062 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
36063 The actual code emitted saves the whole of CR, so we map CR2_REGNO
36064 to the DWARF reg for CR. */
36065 if (format == 1 && regno == CR2_REGNO)
36067 if (CR_REGNO_P (regno))
36068 return regno - CR0_REGNO + 86;
36069 if (regno == VRSAVE_REGNO)
36071 if (regno == VSCR_REGNO)
36074 /* These do not make much sense. */
36075 if (regno == FRAME_POINTER_REGNUM)
36077 if (regno == ARG_POINTER_REGNUM)
36082 gcc_unreachable ();
36086 /* We use the GCC 7 (and before) internal number for non-DWARF debug
36087 information, and also for .eh_frame. */
36088 /* Translate the regnos to their numbers in GCC 7 (and before). */
36091 if (FP_REGNO_P (regno))
36092 return regno - FIRST_FPR_REGNO + 32;
36093 if (ALTIVEC_REGNO_P (regno))
36094 return regno - FIRST_ALTIVEC_REGNO + 77;
36095 if (regno == LR_REGNO)
36097 if (regno == CTR_REGNO)
36099 if (regno == CA_REGNO)
36100 return 76; /* XER */
36101 if (CR_REGNO_P (regno))
36102 return regno - CR0_REGNO + 68;
36103 if (regno == VRSAVE_REGNO)
36105 if (regno == VSCR_REGNO)
36108 if (regno == FRAME_POINTER_REGNUM)
36110 if (regno == ARG_POINTER_REGNUM)
36115 gcc_unreachable ();
36118 /* target hook eh_return_filter_mode */
36119 static scalar_int_mode
36120 rs6000_eh_return_filter_mode (void)
36122 return TARGET_32BIT ? SImode : word_mode;
36125 /* Target hook for translate_mode_attribute. */
36126 static machine_mode
36127 rs6000_translate_mode_attribute (machine_mode mode)
36129 if ((FLOAT128_IEEE_P (mode)
36130 && ieee128_float_type_node == long_double_type_node)
36131 || (FLOAT128_IBM_P (mode)
36132 && ibm128_float_type_node == long_double_type_node))
36133 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
36137 /* Target hook for scalar_mode_supported_p. */
36139 rs6000_scalar_mode_supported_p (scalar_mode mode)
36141 /* -m32 does not support TImode. This is the default, from
36142 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
36143 same ABI as for -m32. But default_scalar_mode_supported_p allows
36144 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
36145 for -mpowerpc64. */
36146 if (TARGET_32BIT && mode == TImode)
36149 if (DECIMAL_FLOAT_MODE_P (mode))
36150 return default_decimal_float_supported_p ();
36151 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
36154 return default_scalar_mode_supported_p (mode);
36157 /* Target hook for vector_mode_supported_p. */
36159 rs6000_vector_mode_supported_p (machine_mode mode)
36161 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
36162 128-bit, the compiler might try to widen IEEE 128-bit to IBM
36164 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
36171 /* Target hook for floatn_mode. */
36172 static opt_scalar_float_mode
36173 rs6000_floatn_mode (int n, bool extended)
36183 if (TARGET_FLOAT128_TYPE)
36184 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36186 return opt_scalar_float_mode ();
36189 return opt_scalar_float_mode ();
36192 /* Those are the only valid _FloatNx types. */
36193 gcc_unreachable ();
36207 if (TARGET_FLOAT128_TYPE)
36208 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36210 return opt_scalar_float_mode ();
36213 return opt_scalar_float_mode ();
36219 /* Target hook for c_mode_for_suffix. */
36220 static machine_mode
36221 rs6000_c_mode_for_suffix (char suffix)
36223 if (TARGET_FLOAT128_TYPE)
36225 if (suffix == 'q' || suffix == 'Q')
36226 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36228 /* At the moment, we are not defining a suffix for IBM extended double.
36229 If/when the default for -mabi=ieeelongdouble is changed, and we want
36230 to support __ibm128 constants in legacy library code, we may need to
36231 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
36232 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
36233 __float80 constants. */
36239 /* Target hook for invalid_arg_for_unprototyped_fn. */
36240 static const char *
36241 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
36243 return (!rs6000_darwin64_abi
36245 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
36246 && (funcdecl == NULL_TREE
36247 || (TREE_CODE (funcdecl) == FUNCTION_DECL
36248 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
36249 ? N_("AltiVec argument passed to unprototyped function")
36253 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
36254 setup by using __stack_chk_fail_local hidden function instead of
36255 calling __stack_chk_fail directly. Otherwise it is better to call
36256 __stack_chk_fail directly. */
36258 static tree ATTRIBUTE_UNUSED
36259 rs6000_stack_protect_fail (void)
36261 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
36262 ? default_hidden_stack_protect_fail ()
36263 : default_external_stack_protect_fail ();
36266 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
36269 static unsigned HOST_WIDE_INT
36270 rs6000_asan_shadow_offset (void)
36272 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
36276 /* Mask options that we want to support inside of attribute((target)) and
36277 #pragma GCC target operations. Note, we do not include things like
36278 64/32-bit, endianness, hard/soft floating point, etc. that would have
36279 different calling sequences. */
36281 struct rs6000_opt_mask {
36282 const char *name; /* option name */
36283 HOST_WIDE_INT mask; /* mask to set */
36284 bool invert; /* invert sense of mask */
36285 bool valid_target; /* option is a target option */
36288 static struct rs6000_opt_mask const rs6000_opt_masks[] =
36290 { "altivec", OPTION_MASK_ALTIVEC, false, true },
36291 { "cmpb", OPTION_MASK_CMPB, false, true },
36292 { "crypto", OPTION_MASK_CRYPTO, false, true },
36293 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
36294 { "dlmzb", OPTION_MASK_DLMZB, false, true },
36295 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
36297 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
36298 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
36299 { "fprnd", OPTION_MASK_FPRND, false, true },
36300 { "hard-dfp", OPTION_MASK_DFP, false, true },
36301 { "htm", OPTION_MASK_HTM, false, true },
36302 { "isel", OPTION_MASK_ISEL, false, true },
36303 { "mfcrf", OPTION_MASK_MFCRF, false, true },
36304 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
36305 { "modulo", OPTION_MASK_MODULO, false, true },
36306 { "mulhw", OPTION_MASK_MULHW, false, true },
36307 { "multiple", OPTION_MASK_MULTIPLE, false, true },
36308 { "popcntb", OPTION_MASK_POPCNTB, false, true },
36309 { "popcntd", OPTION_MASK_POPCNTD, false, true },
36310 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
36311 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
36312 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
36313 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
36314 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
36315 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
36316 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
36317 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
36318 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
36319 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
36320 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
36321 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
36322 { "string", 0, false, true },
36323 { "update", OPTION_MASK_NO_UPDATE, true , true },
36324 { "vsx", OPTION_MASK_VSX, false, true },
36325 #ifdef OPTION_MASK_64BIT
36327 { "aix64", OPTION_MASK_64BIT, false, false },
36328 { "aix32", OPTION_MASK_64BIT, true, false },
36330 { "64", OPTION_MASK_64BIT, false, false },
36331 { "32", OPTION_MASK_64BIT, true, false },
36334 #ifdef OPTION_MASK_EABI
36335 { "eabi", OPTION_MASK_EABI, false, false },
36337 #ifdef OPTION_MASK_LITTLE_ENDIAN
36338 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
36339 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
36341 #ifdef OPTION_MASK_RELOCATABLE
36342 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
36344 #ifdef OPTION_MASK_STRICT_ALIGN
36345 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
36347 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
36348 { "string", 0, false, false },
36351 /* Builtin mask mapping for printing the flags. */
36352 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
36354 { "altivec", RS6000_BTM_ALTIVEC, false, false },
36355 { "vsx", RS6000_BTM_VSX, false, false },
36356 { "fre", RS6000_BTM_FRE, false, false },
36357 { "fres", RS6000_BTM_FRES, false, false },
36358 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
36359 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
36360 { "popcntd", RS6000_BTM_POPCNTD, false, false },
36361 { "cell", RS6000_BTM_CELL, false, false },
36362 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
36363 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
36364 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
36365 { "crypto", RS6000_BTM_CRYPTO, false, false },
36366 { "htm", RS6000_BTM_HTM, false, false },
36367 { "hard-dfp", RS6000_BTM_DFP, false, false },
36368 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
36369 { "long-double-128", RS6000_BTM_LDBL128, false, false },
36370 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
36371 { "float128", RS6000_BTM_FLOAT128, false, false },
36372 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
36375 /* Option variables that we want to support inside attribute((target)) and
36376 #pragma GCC target operations. */
36378 struct rs6000_opt_var {
36379 const char *name; /* option name */
36380 size_t global_offset; /* offset of the option in global_options. */
36381 size_t target_offset; /* offset of the option in target options. */
36384 static struct rs6000_opt_var const rs6000_opt_vars[] =
36387 offsetof (struct gcc_options, x_TARGET_FRIZ),
36388 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
36389 { "avoid-indexed-addresses",
36390 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
36391 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
36393 offsetof (struct gcc_options, x_rs6000_default_long_calls),
36394 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
36395 { "optimize-swaps",
36396 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
36397 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
36398 { "allow-movmisalign",
36399 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
36400 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
36402 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
36403 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
36405 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
36406 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
36407 { "align-branch-targets",
36408 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
36409 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
36411 offsetof (struct gcc_options, x_tls_markers),
36412 offsetof (struct cl_target_option, x_tls_markers), },
36414 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36415 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36417 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36418 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36419 { "speculate-indirect-jumps",
36420 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
36421 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
36424 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
36425 parsing. Return true if there were no errors. */
36428 rs6000_inner_target_options (tree args, bool attr_p)
36432 if (args == NULL_TREE)
36435 else if (TREE_CODE (args) == STRING_CST)
36437 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36440 while ((q = strtok (p, ",")) != NULL)
36442 bool error_p = false;
36443 bool not_valid_p = false;
36444 const char *cpu_opt = NULL;
36447 if (strncmp (q, "cpu=", 4) == 0)
36449 int cpu_index = rs6000_cpu_name_lookup (q+4);
36450 if (cpu_index >= 0)
36451 rs6000_cpu_index = cpu_index;
36458 else if (strncmp (q, "tune=", 5) == 0)
36460 int tune_index = rs6000_cpu_name_lookup (q+5);
36461 if (tune_index >= 0)
36462 rs6000_tune_index = tune_index;
36472 bool invert = false;
36476 if (strncmp (r, "no-", 3) == 0)
36482 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
36483 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
36485 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
36487 if (!rs6000_opt_masks[i].valid_target)
36488 not_valid_p = true;
36492 rs6000_isa_flags_explicit |= mask;
36494 /* VSX needs altivec, so -mvsx automagically sets
36495 altivec and disables -mavoid-indexed-addresses. */
36498 if (mask == OPTION_MASK_VSX)
36500 mask |= OPTION_MASK_ALTIVEC;
36501 TARGET_AVOID_XFORM = 0;
36505 if (rs6000_opt_masks[i].invert)
36509 rs6000_isa_flags &= ~mask;
36511 rs6000_isa_flags |= mask;
36516 if (error_p && !not_valid_p)
36518 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
36519 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
36521 size_t j = rs6000_opt_vars[i].global_offset;
36522 *((int *) ((char *)&global_options + j)) = !invert;
36524 not_valid_p = false;
36532 const char *eprefix, *esuffix;
36537 eprefix = "__attribute__((__target__(";
36542 eprefix = "#pragma GCC target ";
36547 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
36549 else if (not_valid_p)
36550 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
36552 error ("%s%qs%s is invalid", eprefix, q, esuffix);
36557 else if (TREE_CODE (args) == TREE_LIST)
36561 tree value = TREE_VALUE (args);
36564 bool ret2 = rs6000_inner_target_options (value, attr_p);
36568 args = TREE_CHAIN (args);
36570 while (args != NULL_TREE);
36575 error ("attribute %<target%> argument not a string");
36582 /* Print out the target options as a list for -mdebug=target. */
36585 rs6000_debug_target_options (tree args, const char *prefix)
36587 if (args == NULL_TREE)
36588 fprintf (stderr, "%s<NULL>", prefix);
36590 else if (TREE_CODE (args) == STRING_CST)
36592 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36595 while ((q = strtok (p, ",")) != NULL)
36598 fprintf (stderr, "%s\"%s\"", prefix, q);
36603 else if (TREE_CODE (args) == TREE_LIST)
36607 tree value = TREE_VALUE (args);
36610 rs6000_debug_target_options (value, prefix);
36613 args = TREE_CHAIN (args);
36615 while (args != NULL_TREE);
36619 gcc_unreachable ();
36625 /* Hook to validate attribute((target("..."))). */
36628 rs6000_valid_attribute_p (tree fndecl,
36629 tree ARG_UNUSED (name),
36633 struct cl_target_option cur_target;
36636 tree new_target, new_optimize;
36637 tree func_optimize;
36639 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
36641 if (TARGET_DEBUG_TARGET)
36643 tree tname = DECL_NAME (fndecl);
36644 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
36646 fprintf (stderr, "function: %.*s\n",
36647 (int) IDENTIFIER_LENGTH (tname),
36648 IDENTIFIER_POINTER (tname));
36650 fprintf (stderr, "function: unknown\n");
36652 fprintf (stderr, "args:");
36653 rs6000_debug_target_options (args, " ");
36654 fprintf (stderr, "\n");
36657 fprintf (stderr, "flags: 0x%x\n", flags);
36659 fprintf (stderr, "--------------------\n");
36662 /* attribute((target("default"))) does nothing, beyond
36663 affecting multi-versioning. */
36664 if (TREE_VALUE (args)
36665 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
36666 && TREE_CHAIN (args) == NULL_TREE
36667 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
36670 old_optimize = build_optimization_node (&global_options);
36671 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
36673 /* If the function changed the optimization levels as well as setting target
36674 options, start with the optimizations specified. */
36675 if (func_optimize && func_optimize != old_optimize)
36676 cl_optimization_restore (&global_options,
36677 TREE_OPTIMIZATION (func_optimize));
36679 /* The target attributes may also change some optimization flags, so update
36680 the optimization options if necessary. */
36681 cl_target_option_save (&cur_target, &global_options);
36682 rs6000_cpu_index = rs6000_tune_index = -1;
36683 ret = rs6000_inner_target_options (args, true);
36685 /* Set up any additional state. */
36688 ret = rs6000_option_override_internal (false);
36689 new_target = build_target_option_node (&global_options);
36694 new_optimize = build_optimization_node (&global_options);
36701 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
36703 if (old_optimize != new_optimize)
36704 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
36707 cl_target_option_restore (&global_options, &cur_target);
36709 if (old_optimize != new_optimize)
36710 cl_optimization_restore (&global_options,
36711 TREE_OPTIMIZATION (old_optimize));
36717 /* Hook to validate the current #pragma GCC target and set the state, and
36718 update the macros based on what was changed. If ARGS is NULL, then
36719 POP_TARGET is used to reset the options. */
36722 rs6000_pragma_target_parse (tree args, tree pop_target)
36724 tree prev_tree = build_target_option_node (&global_options);
36726 struct cl_target_option *prev_opt, *cur_opt;
36727 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
36728 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
36730 if (TARGET_DEBUG_TARGET)
36732 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
36733 fprintf (stderr, "args:");
36734 rs6000_debug_target_options (args, " ");
36735 fprintf (stderr, "\n");
36739 fprintf (stderr, "pop_target:\n");
36740 debug_tree (pop_target);
36743 fprintf (stderr, "pop_target: <NULL>\n");
36745 fprintf (stderr, "--------------------\n");
36750 cur_tree = ((pop_target)
36752 : target_option_default_node);
36753 cl_target_option_restore (&global_options,
36754 TREE_TARGET_OPTION (cur_tree));
36758 rs6000_cpu_index = rs6000_tune_index = -1;
36759 if (!rs6000_inner_target_options (args, false)
36760 || !rs6000_option_override_internal (false)
36761 || (cur_tree = build_target_option_node (&global_options))
36764 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
36765 fprintf (stderr, "invalid pragma\n");
36771 target_option_current_node = cur_tree;
36772 rs6000_activate_target_options (target_option_current_node);
36774 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
36775 change the macros that are defined. */
36776 if (rs6000_target_modify_macros_ptr)
36778 prev_opt = TREE_TARGET_OPTION (prev_tree);
36779 prev_bumask = prev_opt->x_rs6000_builtin_mask;
36780 prev_flags = prev_opt->x_rs6000_isa_flags;
36782 cur_opt = TREE_TARGET_OPTION (cur_tree);
36783 cur_flags = cur_opt->x_rs6000_isa_flags;
36784 cur_bumask = cur_opt->x_rs6000_builtin_mask;
36786 diff_bumask = (prev_bumask ^ cur_bumask);
36787 diff_flags = (prev_flags ^ cur_flags);
36789 if ((diff_flags != 0) || (diff_bumask != 0))
36791 /* Delete old macros. */
36792 rs6000_target_modify_macros_ptr (false,
36793 prev_flags & diff_flags,
36794 prev_bumask & diff_bumask);
36796 /* Define new macros. */
36797 rs6000_target_modify_macros_ptr (true,
36798 cur_flags & diff_flags,
36799 cur_bumask & diff_bumask);
36807 /* Remember the last target of rs6000_set_current_function. */
36808 static GTY(()) tree rs6000_previous_fndecl;
36810 /* Restore target's globals from NEW_TREE and invalidate the
36811 rs6000_previous_fndecl cache. */
36814 rs6000_activate_target_options (tree new_tree)
36816 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
36817 if (TREE_TARGET_GLOBALS (new_tree))
36818 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
36819 else if (new_tree == target_option_default_node)
36820 restore_target_globals (&default_target_globals);
36822 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
36823 rs6000_previous_fndecl = NULL_TREE;
36826 /* Establish appropriate back-end context for processing the function
36827 FNDECL. The argument might be NULL to indicate processing at top
36828 level, outside of any function scope. */
36830 rs6000_set_current_function (tree fndecl)
36832 if (TARGET_DEBUG_TARGET)
36834 fprintf (stderr, "\n==================== rs6000_set_current_function");
36837 fprintf (stderr, ", fndecl %s (%p)",
36838 (DECL_NAME (fndecl)
36839 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
36840 : "<unknown>"), (void *)fndecl);
36842 if (rs6000_previous_fndecl)
36843 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
36845 fprintf (stderr, "\n");
36848 /* Only change the context if the function changes. This hook is called
36849 several times in the course of compiling a function, and we don't want to
36850 slow things down too much or call target_reinit when it isn't safe. */
36851 if (fndecl == rs6000_previous_fndecl)
36855 if (rs6000_previous_fndecl == NULL_TREE)
36856 old_tree = target_option_current_node;
36857 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
36858 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
36860 old_tree = target_option_default_node;
36863 if (fndecl == NULL_TREE)
36865 if (old_tree != target_option_current_node)
36866 new_tree = target_option_current_node;
36868 new_tree = NULL_TREE;
36872 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
36873 if (new_tree == NULL_TREE)
36874 new_tree = target_option_default_node;
36877 if (TARGET_DEBUG_TARGET)
36881 fprintf (stderr, "\nnew fndecl target specific options:\n");
36882 debug_tree (new_tree);
36887 fprintf (stderr, "\nold fndecl target specific options:\n");
36888 debug_tree (old_tree);
36891 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
36892 fprintf (stderr, "--------------------\n");
36895 if (new_tree && old_tree != new_tree)
36896 rs6000_activate_target_options (new_tree);
36899 rs6000_previous_fndecl = fndecl;
36903 /* Save the current options */
36906 rs6000_function_specific_save (struct cl_target_option *ptr,
36907 struct gcc_options *opts)
36909 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
36910 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
36913 /* Restore the current options */
36916 rs6000_function_specific_restore (struct gcc_options *opts,
36917 struct cl_target_option *ptr)
36920 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
36921 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
36922 (void) rs6000_option_override_internal (false);
36925 /* Print the current options */
36928 rs6000_function_specific_print (FILE *file, int indent,
36929 struct cl_target_option *ptr)
36931 rs6000_print_isa_options (file, indent, "Isa options set",
36932 ptr->x_rs6000_isa_flags);
36934 rs6000_print_isa_options (file, indent, "Isa options explicit",
36935 ptr->x_rs6000_isa_flags_explicit);
36938 /* Helper function to print the current isa or misc options on a line. */
36941 rs6000_print_options_internal (FILE *file,
36943 const char *string,
36944 HOST_WIDE_INT flags,
36945 const char *prefix,
36946 const struct rs6000_opt_mask *opts,
36947 size_t num_elements)
36950 size_t start_column = 0;
36952 size_t max_column = 120;
36953 size_t prefix_len = strlen (prefix);
36954 size_t comma_len = 0;
36955 const char *comma = "";
36958 start_column += fprintf (file, "%*s", indent, "");
36962 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
36966 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
36968 /* Print the various mask options. */
36969 cur_column = start_column;
36970 for (i = 0; i < num_elements; i++)
36972 bool invert = opts[i].invert;
36973 const char *name = opts[i].name;
36974 const char *no_str = "";
36975 HOST_WIDE_INT mask = opts[i].mask;
36976 size_t len = comma_len + prefix_len + strlen (name);
36980 if ((flags & mask) == 0)
36983 len += sizeof ("no-") - 1;
36991 if ((flags & mask) != 0)
36994 len += sizeof ("no-") - 1;
37001 if (cur_column > max_column)
37003 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
37004 cur_column = start_column + len;
37008 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
37010 comma_len = sizeof (", ") - 1;
37013 fputs ("\n", file);
37016 /* Helper function to print the current isa options on a line. */
37019 rs6000_print_isa_options (FILE *file, int indent, const char *string,
37020 HOST_WIDE_INT flags)
37022 rs6000_print_options_internal (file, indent, string, flags, "-m",
37023 &rs6000_opt_masks[0],
37024 ARRAY_SIZE (rs6000_opt_masks));
37028 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
37029 HOST_WIDE_INT flags)
37031 rs6000_print_options_internal (file, indent, string, flags, "",
37032 &rs6000_builtin_mask_names[0],
37033 ARRAY_SIZE (rs6000_builtin_mask_names));
37036 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
37037 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
37038 -mupper-regs-df, etc.).
37040 If the user used -mno-power8-vector, we need to turn off all of the implicit
37041 ISA 2.07 and 3.0 options that relate to the vector unit.
37043 If the user used -mno-power9-vector, we need to turn off all of the implicit
37044 ISA 3.0 options that relate to the vector unit.
37046 This function does not handle explicit options such as the user specifying
37047 -mdirect-move. These are handled in rs6000_option_override_internal, and
37048 the appropriate error is given if needed.
37050 We return a mask of all of the implicit options that should not be enabled
37053 static HOST_WIDE_INT
37054 rs6000_disable_incompatible_switches (void)
37056 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
37059 static const struct {
37060 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
37061 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
37062 const char *const name; /* name of the switch. */
37064 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
37065 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
37066 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
37069 for (i = 0; i < ARRAY_SIZE (flags); i++)
37071 HOST_WIDE_INT no_flag = flags[i].no_flag;
37073 if ((rs6000_isa_flags & no_flag) == 0
37074 && (rs6000_isa_flags_explicit & no_flag) != 0)
37076 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
37077 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
37083 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
37084 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
37086 set_flags &= ~rs6000_opt_masks[j].mask;
37087 error ("%<-mno-%s%> turns off %<-m%s%>",
37089 rs6000_opt_masks[j].name);
37092 gcc_assert (!set_flags);
37095 rs6000_isa_flags &= ~dep_flags;
37096 ignore_masks |= no_flag | dep_flags;
37100 return ignore_masks;
37104 /* Helper function for printing the function name when debugging. */
37106 static const char *
37107 get_decl_name (tree fn)
37114 name = DECL_NAME (fn);
37116 return "<no-name>";
37118 return IDENTIFIER_POINTER (name);
37121 /* Return the clone id of the target we are compiling code for in a target
37122 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
37123 the priority list for the target clones (ordered from lowest to
37127 rs6000_clone_priority (tree fndecl)
37129 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
37130 HOST_WIDE_INT isa_masks;
37131 int ret = CLONE_DEFAULT;
37132 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
37133 const char *attrs_str = NULL;
37135 attrs = TREE_VALUE (TREE_VALUE (attrs));
37136 attrs_str = TREE_STRING_POINTER (attrs);
37138 /* Return priority zero for default function. Return the ISA needed for the
37139 function if it is not the default. */
37140 if (strcmp (attrs_str, "default") != 0)
37142 if (fn_opts == NULL_TREE)
37143 fn_opts = target_option_default_node;
37145 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
37146 isa_masks = rs6000_isa_flags;
37148 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
37150 for (ret = CLONE_MAX - 1; ret != 0; ret--)
37151 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
37155 if (TARGET_DEBUG_TARGET)
37156 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
37157 get_decl_name (fndecl), ret);
37162 /* This compares the priority of target features in function DECL1 and DECL2.
37163 It returns positive value if DECL1 is higher priority, negative value if
37164 DECL2 is higher priority and 0 if they are the same. Note, priorities are
37165 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
37168 rs6000_compare_version_priority (tree decl1, tree decl2)
37170 int priority1 = rs6000_clone_priority (decl1);
37171 int priority2 = rs6000_clone_priority (decl2);
37172 int ret = priority1 - priority2;
37174 if (TARGET_DEBUG_TARGET)
37175 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
37176 get_decl_name (decl1), get_decl_name (decl2), ret);
37181 /* Make a dispatcher declaration for the multi-versioned function DECL.
37182 Calls to DECL function will be replaced with calls to the dispatcher
37183 by the front-end. Returns the decl of the dispatcher function. */
37186 rs6000_get_function_versions_dispatcher (void *decl)
37188 tree fn = (tree) decl;
37189 struct cgraph_node *node = NULL;
37190 struct cgraph_node *default_node = NULL;
37191 struct cgraph_function_version_info *node_v = NULL;
37192 struct cgraph_function_version_info *first_v = NULL;
37194 tree dispatch_decl = NULL;
37196 struct cgraph_function_version_info *default_version_info = NULL;
37197 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
37199 if (TARGET_DEBUG_TARGET)
37200 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
37201 get_decl_name (fn));
37203 node = cgraph_node::get (fn);
37204 gcc_assert (node != NULL);
37206 node_v = node->function_version ();
37207 gcc_assert (node_v != NULL);
37209 if (node_v->dispatcher_resolver != NULL)
37210 return node_v->dispatcher_resolver;
37212 /* Find the default version and make it the first node. */
37214 /* Go to the beginning of the chain. */
37215 while (first_v->prev != NULL)
37216 first_v = first_v->prev;
37218 default_version_info = first_v;
37219 while (default_version_info != NULL)
37221 const tree decl2 = default_version_info->this_node->decl;
37222 if (is_function_default_version (decl2))
37224 default_version_info = default_version_info->next;
37227 /* If there is no default node, just return NULL. */
37228 if (default_version_info == NULL)
37231 /* Make default info the first node. */
37232 if (first_v != default_version_info)
37234 default_version_info->prev->next = default_version_info->next;
37235 if (default_version_info->next)
37236 default_version_info->next->prev = default_version_info->prev;
37237 first_v->prev = default_version_info;
37238 default_version_info->next = first_v;
37239 default_version_info->prev = NULL;
37242 default_node = default_version_info->this_node;
37244 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
37245 error_at (DECL_SOURCE_LOCATION (default_node->decl),
37246 "target_clones attribute needs GLIBC (2.23 and newer) that "
37247 "exports hardware capability bits");
37250 if (targetm.has_ifunc_p ())
37252 struct cgraph_function_version_info *it_v = NULL;
37253 struct cgraph_node *dispatcher_node = NULL;
37254 struct cgraph_function_version_info *dispatcher_version_info = NULL;
37256 /* Right now, the dispatching is done via ifunc. */
37257 dispatch_decl = make_dispatcher_decl (default_node->decl);
37259 dispatcher_node = cgraph_node::get_create (dispatch_decl);
37260 gcc_assert (dispatcher_node != NULL);
37261 dispatcher_node->dispatcher_function = 1;
37262 dispatcher_version_info
37263 = dispatcher_node->insert_new_function_version ();
37264 dispatcher_version_info->next = default_version_info;
37265 dispatcher_node->definition = 1;
37267 /* Set the dispatcher for all the versions. */
37268 it_v = default_version_info;
37269 while (it_v != NULL)
37271 it_v->dispatcher_resolver = dispatch_decl;
37277 error_at (DECL_SOURCE_LOCATION (default_node->decl),
37278 "multiversioning needs ifunc which is not supported "
37283 return dispatch_decl;
37286 /* Make the resolver function decl to dispatch the versions of a multi-
37287 versioned function, DEFAULT_DECL. Create an empty basic block in the
37288 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
37292 make_resolver_func (const tree default_decl,
37293 const tree dispatch_decl,
37294 basic_block *empty_bb)
37296 /* Make the resolver function static. The resolver function returns
37298 tree decl_name = clone_function_name (default_decl, "resolver");
37299 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
37300 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
37301 tree decl = build_fn_decl (resolver_name, type);
37302 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
37304 DECL_NAME (decl) = decl_name;
37305 TREE_USED (decl) = 1;
37306 DECL_ARTIFICIAL (decl) = 1;
37307 DECL_IGNORED_P (decl) = 0;
37308 TREE_PUBLIC (decl) = 0;
37309 DECL_UNINLINABLE (decl) = 1;
37311 /* Resolver is not external, body is generated. */
37312 DECL_EXTERNAL (decl) = 0;
37313 DECL_EXTERNAL (dispatch_decl) = 0;
37315 DECL_CONTEXT (decl) = NULL_TREE;
37316 DECL_INITIAL (decl) = make_node (BLOCK);
37317 DECL_STATIC_CONSTRUCTOR (decl) = 0;
37319 /* Build result decl and add to function_decl. */
37320 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
37321 DECL_CONTEXT (t) = decl;
37322 DECL_ARTIFICIAL (t) = 1;
37323 DECL_IGNORED_P (t) = 1;
37324 DECL_RESULT (decl) = t;
37326 gimplify_function_tree (decl);
37327 push_cfun (DECL_STRUCT_FUNCTION (decl));
37328 *empty_bb = init_lowered_empty_function (decl, false,
37329 profile_count::uninitialized ());
37331 cgraph_node::add_new_function (decl, true);
37332 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
37336 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
37337 DECL_ATTRIBUTES (dispatch_decl)
37338 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
37340 cgraph_node::create_same_body_alias (dispatch_decl, decl);
37345 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
37346 return a pointer to VERSION_DECL if we are running on a machine that
37347 supports the index CLONE_ISA hardware architecture bits. This function will
37348 be called during version dispatch to decide which function version to
37349 execute. It returns the basic block at the end, to which more conditions
37353 add_condition_to_bb (tree function_decl, tree version_decl,
37354 int clone_isa, basic_block new_bb)
37356 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
37358 gcc_assert (new_bb != NULL);
37359 gimple_seq gseq = bb_seq (new_bb);
37362 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
37363 build_fold_addr_expr (version_decl));
37364 tree result_var = create_tmp_var (ptr_type_node);
37365 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
37366 gimple *return_stmt = gimple_build_return (result_var);
37368 if (clone_isa == CLONE_DEFAULT)
37370 gimple_seq_add_stmt (&gseq, convert_stmt);
37371 gimple_seq_add_stmt (&gseq, return_stmt);
37372 set_bb_seq (new_bb, gseq);
37373 gimple_set_bb (convert_stmt, new_bb);
37374 gimple_set_bb (return_stmt, new_bb);
37379 tree bool_zero = build_int_cst (bool_int_type_node, 0);
37380 tree cond_var = create_tmp_var (bool_int_type_node);
37381 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
37382 const char *arg_str = rs6000_clone_map[clone_isa].name;
37383 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
37384 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
37385 gimple_call_set_lhs (call_cond_stmt, cond_var);
37387 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
37388 gimple_set_bb (call_cond_stmt, new_bb);
37389 gimple_seq_add_stmt (&gseq, call_cond_stmt);
37391 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
37392 NULL_TREE, NULL_TREE);
37393 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
37394 gimple_set_bb (if_else_stmt, new_bb);
37395 gimple_seq_add_stmt (&gseq, if_else_stmt);
37397 gimple_seq_add_stmt (&gseq, convert_stmt);
37398 gimple_seq_add_stmt (&gseq, return_stmt);
37399 set_bb_seq (new_bb, gseq);
37401 basic_block bb1 = new_bb;
37402 edge e12 = split_block (bb1, if_else_stmt);
37403 basic_block bb2 = e12->dest;
37404 e12->flags &= ~EDGE_FALLTHRU;
37405 e12->flags |= EDGE_TRUE_VALUE;
37407 edge e23 = split_block (bb2, return_stmt);
37408 gimple_set_bb (convert_stmt, bb2);
37409 gimple_set_bb (return_stmt, bb2);
37411 basic_block bb3 = e23->dest;
37412 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
37415 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
37421 /* This function generates the dispatch function for multi-versioned functions.
37422 DISPATCH_DECL is the function which will contain the dispatch logic.
37423 FNDECLS are the function choices for dispatch, and is a tree chain.
37424 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
37425 code is generated. */
37428 dispatch_function_versions (tree dispatch_decl,
37430 basic_block *empty_bb)
37434 vec<tree> *fndecls;
37435 tree clones[CLONE_MAX];
37437 if (TARGET_DEBUG_TARGET)
37438 fputs ("dispatch_function_versions, top\n", stderr);
37440 gcc_assert (dispatch_decl != NULL
37441 && fndecls_p != NULL
37442 && empty_bb != NULL);
37444 /* fndecls_p is actually a vector. */
37445 fndecls = static_cast<vec<tree> *> (fndecls_p);
37447 /* At least one more version other than the default. */
37448 gcc_assert (fndecls->length () >= 2);
37450 /* The first version in the vector is the default decl. */
37451 memset ((void *) clones, '\0', sizeof (clones));
37452 clones[CLONE_DEFAULT] = (*fndecls)[0];
37454 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
37455 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
37456 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
37457 recent glibc. If we ever need to call __builtin_cpu_init, we would need
37458 to insert the code here to do the call. */
37460 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
37462 int priority = rs6000_clone_priority (ele);
37463 if (!clones[priority])
37464 clones[priority] = ele;
37467 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
37470 if (TARGET_DEBUG_TARGET)
37471 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
37472 ix, get_decl_name (clones[ix]));
37474 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
37481 /* Generate the dispatching code body to dispatch multi-versioned function
37482 DECL. The target hook is called to process the "target" attributes and
37483 provide the code to dispatch the right function at run-time. NODE points
37484 to the dispatcher decl whose body will be created. */
37487 rs6000_generate_version_dispatcher_body (void *node_p)
37490 basic_block empty_bb;
37491 struct cgraph_node *node = (cgraph_node *) node_p;
37492 struct cgraph_function_version_info *ninfo = node->function_version ();
37494 if (ninfo->dispatcher_resolver)
37495 return ninfo->dispatcher_resolver;
37497 /* node is going to be an alias, so remove the finalized bit. */
37498 node->definition = false;
37500 /* The first version in the chain corresponds to the default version. */
37501 ninfo->dispatcher_resolver = resolver
37502 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
37504 if (TARGET_DEBUG_TARGET)
37505 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
37506 get_decl_name (resolver));
37508 push_cfun (DECL_STRUCT_FUNCTION (resolver));
37509 auto_vec<tree, 2> fn_ver_vec;
37511 for (struct cgraph_function_version_info *vinfo = ninfo->next;
37513 vinfo = vinfo->next)
37515 struct cgraph_node *version = vinfo->this_node;
37516 /* Check for virtual functions here again, as by this time it should
37517 have been determined if this function needs a vtable index or
37518 not. This happens for methods in derived classes that override
37519 virtual methods in base classes but are not explicitly marked as
37521 if (DECL_VINDEX (version->decl))
37522 sorry ("Virtual function multiversioning not supported");
37524 fn_ver_vec.safe_push (version->decl);
37527 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
37528 cgraph_edge::rebuild_edges ();
37534 /* Hook to determine if one function can safely inline another. */
37537 rs6000_can_inline_p (tree caller, tree callee)
37540 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
37541 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
37543 /* If callee has no option attributes, then it is ok to inline. */
37547 /* If caller has no option attributes, but callee does then it is not ok to
37549 else if (!caller_tree)
37554 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
37555 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
37557 /* Callee's options should a subset of the caller's, i.e. a vsx function
37558 can inline an altivec function but a non-vsx function can't inline a
37560 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
37561 == callee_opts->x_rs6000_isa_flags)
37565 if (TARGET_DEBUG_TARGET)
37566 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
37567 get_decl_name (caller), get_decl_name (callee),
37568 (ret ? "can" : "cannot"));
37573 /* Allocate a stack temp and fixup the address so it meets the particular
37574 memory requirements (either offetable or REG+REG addressing). */
37577 rs6000_allocate_stack_temp (machine_mode mode,
37578 bool offsettable_p,
37581 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
37582 rtx addr = XEXP (stack, 0);
37583 int strict_p = reload_completed;
37585 if (!legitimate_indirect_address_p (addr, strict_p))
37588 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
37589 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37591 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
37592 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37598 /* Given a memory reference, if it is not a reg or reg+reg addressing,
37599 convert to such a form to deal with memory reference instructions
37600 like STFIWX and LDBRX that only take reg+reg addressing. */
37603 rs6000_force_indexed_or_indirect_mem (rtx x)
37605 machine_mode mode = GET_MODE (x);
37607 gcc_assert (MEM_P (x));
37608 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
37610 rtx addr = XEXP (x, 0);
37611 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
37613 rtx reg = XEXP (addr, 0);
37614 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
37615 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
37616 gcc_assert (REG_P (reg));
37617 emit_insn (gen_add3_insn (reg, reg, size_rtx));
37620 else if (GET_CODE (addr) == PRE_MODIFY)
37622 rtx reg = XEXP (addr, 0);
37623 rtx expr = XEXP (addr, 1);
37624 gcc_assert (REG_P (reg));
37625 gcc_assert (GET_CODE (expr) == PLUS);
37626 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
37630 x = replace_equiv_address (x, force_reg (Pmode, addr));
37636 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
37638 On the RS/6000, all integer constants are acceptable, most won't be valid
37639 for particular insns, though. Only easy FP constants are acceptable. */
37642 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
37644 if (TARGET_ELF && tls_referenced_p (x))
37647 if (CONST_DOUBLE_P (x))
37648 return easy_fp_constant (x, mode);
37650 if (GET_CODE (x) == CONST_VECTOR)
37651 return easy_vector_constant (x, mode);
37657 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
37660 chain_already_loaded (rtx_insn *last)
37662 for (; last != NULL; last = PREV_INSN (last))
37664 if (NONJUMP_INSN_P (last))
37666 rtx patt = PATTERN (last);
37668 if (GET_CODE (patt) == SET)
37670 rtx lhs = XEXP (patt, 0);
37672 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
37680 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
37683 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
37685 rtx func = func_desc;
37686 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
37687 rtx toc_load = NULL_RTX;
37688 rtx toc_restore = NULL_RTX;
37690 rtx abi_reg = NULL_RTX;
37694 bool is_pltseq_longcall;
37697 tlsarg = global_tlsarg;
37699 /* Handle longcall attributes. */
37700 is_pltseq_longcall = false;
37701 if ((INTVAL (cookie) & CALL_LONG) != 0
37702 && GET_CODE (func_desc) == SYMBOL_REF)
37704 func = rs6000_longcall_ref (func_desc, tlsarg);
37706 is_pltseq_longcall = true;
37709 /* Handle indirect calls. */
37710 if (!SYMBOL_REF_P (func)
37711 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
37713 /* Save the TOC into its reserved slot before the call,
37714 and prepare to restore it after the call. */
37715 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
37716 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
37717 gen_rtvec (1, stack_toc_offset),
37719 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
37721 /* Can we optimize saving the TOC in the prologue or
37722 do we need to do it at every call? */
37723 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
37724 cfun->machine->save_toc_in_prologue = true;
37727 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
37728 rtx stack_toc_mem = gen_frame_mem (Pmode,
37729 gen_rtx_PLUS (Pmode, stack_ptr,
37730 stack_toc_offset));
37731 MEM_VOLATILE_P (stack_toc_mem) = 1;
37732 if (is_pltseq_longcall)
37734 /* Use USPEC_PLTSEQ here to emit every instruction in an
37735 inline PLT call sequence with a reloc, enabling the
37736 linker to edit the sequence back to a direct call
37737 when that makes sense. */
37738 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
37739 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
37740 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
37743 emit_move_insn (stack_toc_mem, toc_reg);
37746 if (DEFAULT_ABI == ABI_ELFv2)
37748 /* A function pointer in the ELFv2 ABI is just a plain address, but
37749 the ABI requires it to be loaded into r12 before the call. */
37750 func_addr = gen_rtx_REG (Pmode, 12);
37751 if (!rtx_equal_p (func_addr, func))
37752 emit_move_insn (func_addr, func);
37753 abi_reg = func_addr;
37754 /* Indirect calls via CTR are strongly preferred over indirect
37755 calls via LR, so move the address there. Needed to mark
37756 this insn for linker plt sequence editing too. */
37757 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
37758 if (is_pltseq_longcall)
37760 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
37761 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
37762 emit_insn (gen_rtx_SET (func_addr, mark_func));
37763 v = gen_rtvec (2, func_addr, func_desc);
37764 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
37767 emit_move_insn (func_addr, abi_reg);
37771 /* A function pointer under AIX is a pointer to a data area whose
37772 first word contains the actual address of the function, whose
37773 second word contains a pointer to its TOC, and whose third word
37774 contains a value to place in the static chain register (r11).
37775 Note that if we load the static chain, our "trampoline" need
37776 not have any executable code. */
37778 /* Load up address of the actual function. */
37779 func = force_reg (Pmode, func);
37780 func_addr = gen_reg_rtx (Pmode);
37781 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
37783 /* Indirect calls via CTR are strongly preferred over indirect
37784 calls via LR, so move the address there. */
37785 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
37786 emit_move_insn (ctr_reg, func_addr);
37787 func_addr = ctr_reg;
37789 /* Prepare to load the TOC of the called function. Note that the
37790 TOC load must happen immediately before the actual call so
37791 that unwinding the TOC registers works correctly. See the
37792 comment in frob_update_context. */
37793 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
37794 rtx func_toc_mem = gen_rtx_MEM (Pmode,
37795 gen_rtx_PLUS (Pmode, func,
37797 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
37799 /* If we have a static chain, load it up. But, if the call was
37800 originally direct, the 3rd word has not been written since no
37801 trampoline has been built, so we ought not to load it, lest we
37802 override a static chain value. */
37803 if (!(GET_CODE (func_desc) == SYMBOL_REF
37804 && SYMBOL_REF_FUNCTION_P (func_desc))
37805 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
37806 && !chain_already_loaded (get_current_sequence ()->next->last))
37808 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
37809 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
37810 rtx func_sc_mem = gen_rtx_MEM (Pmode,
37811 gen_rtx_PLUS (Pmode, func,
37813 emit_move_insn (sc_reg, func_sc_mem);
37820 /* Direct calls use the TOC: for local calls, the callee will
37821 assume the TOC register is set; for non-local calls, the
37822 PLT stub needs the TOC register. */
37827 /* Create the call. */
37828 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
37829 if (value != NULL_RTX)
37830 call[0] = gen_rtx_SET (value, call[0]);
37834 call[n_call++] = toc_load;
37836 call[n_call++] = toc_restore;
37838 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
37840 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
37841 insn = emit_call_insn (insn);
37843 /* Mention all registers defined by the ABI to hold information
37844 as uses in CALL_INSN_FUNCTION_USAGE. */
37846 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
37849 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
37852 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
37857 gcc_assert (INTVAL (cookie) == 0);
37860 tlsarg = global_tlsarg;
37862 /* Create the call. */
37863 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
37864 if (value != NULL_RTX)
37865 call[0] = gen_rtx_SET (value, call[0]);
37867 call[1] = simple_return_rtx;
37869 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
37870 insn = emit_call_insn (insn);
37872 /* Note use of the TOC register. */
37873 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
37876 /* Expand code to perform a call under the SYSV4 ABI. */
37879 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
37881 rtx func = func_desc;
37885 rtx abi_reg = NULL_RTX;
37889 tlsarg = global_tlsarg;
37891 /* Handle longcall attributes. */
37892 if ((INTVAL (cookie) & CALL_LONG) != 0
37893 && GET_CODE (func_desc) == SYMBOL_REF)
37895 func = rs6000_longcall_ref (func_desc, tlsarg);
37896 /* If the longcall was implemented as an inline PLT call using
37897 PLT unspecs then func will be REG:r11. If not, func will be
37898 a pseudo reg. The inline PLT call sequence supports lazy
37899 linking (and longcalls to functions in dlopen'd libraries).
37900 The other style of longcalls don't. The lazy linking entry
37901 to the dynamic symbol resolver requires r11 be the function
37902 address (as it is for linker generated PLT stubs). Ensure
37903 r11 stays valid to the bctrl by marking r11 used by the call. */
37908 /* Handle indirect calls. */
37909 if (GET_CODE (func) != SYMBOL_REF)
37911 func = force_reg (Pmode, func);
37913 /* Indirect calls via CTR are strongly preferred over indirect
37914 calls via LR, so move the address there. That can't be left
37915 to reload because we want to mark every instruction in an
37916 inline PLT call sequence with a reloc, enabling the linker to
37917 edit the sequence back to a direct call when that makes sense. */
37918 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
37921 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
37922 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
37923 emit_insn (gen_rtx_SET (func_addr, mark_func));
37924 v = gen_rtvec (2, func_addr, func_desc);
37925 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
37928 emit_move_insn (func_addr, func);
37933 /* Create the call. */
37934 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
37935 if (value != NULL_RTX)
37936 call[0] = gen_rtx_SET (value, call[0]);
37938 call[1] = gen_rtx_USE (VOIDmode, cookie);
37940 if (TARGET_SECURE_PLT
37942 && GET_CODE (func_addr) == SYMBOL_REF
37943 && !SYMBOL_REF_LOCAL_P (func_addr))
37944 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
37946 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
37948 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
37949 insn = emit_call_insn (insn);
37951 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
37954 /* Expand code to perform a sibling call under the SysV4 ABI. */
37957 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
37959 rtx func = func_desc;
37963 rtx abi_reg = NULL_RTX;
37966 tlsarg = global_tlsarg;
37968 /* Handle longcall attributes. */
37969 if ((INTVAL (cookie) & CALL_LONG) != 0
37970 && GET_CODE (func_desc) == SYMBOL_REF)
37972 func = rs6000_longcall_ref (func_desc, tlsarg);
37973 /* If the longcall was implemented as an inline PLT call using
37974 PLT unspecs then func will be REG:r11. If not, func will be
37975 a pseudo reg. The inline PLT call sequence supports lazy
37976 linking (and longcalls to functions in dlopen'd libraries).
37977 The other style of longcalls don't. The lazy linking entry
37978 to the dynamic symbol resolver requires r11 be the function
37979 address (as it is for linker generated PLT stubs). Ensure
37980 r11 stays valid to the bctr by marking r11 used by the call. */
37985 /* Handle indirect calls. */
37986 if (GET_CODE (func) != SYMBOL_REF)
37988 func = force_reg (Pmode, func);
37990 /* Indirect sibcalls must go via CTR. That can't be left to
37991 reload because we want to mark every instruction in an inline
37992 PLT call sequence with a reloc, enabling the linker to edit
37993 the sequence back to a direct call when that makes sense. */
37994 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
37997 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
37998 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
37999 emit_insn (gen_rtx_SET (func_addr, mark_func));
38000 v = gen_rtvec (2, func_addr, func_desc);
38001 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
38004 emit_move_insn (func_addr, func);
38009 /* Create the call. */
38010 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
38011 if (value != NULL_RTX)
38012 call[0] = gen_rtx_SET (value, call[0]);
38014 call[1] = gen_rtx_USE (VOIDmode, cookie);
38015 call[2] = simple_return_rtx;
38017 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
38018 insn = emit_call_insn (insn);
38020 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
38025 /* Expand code to perform a call under the Darwin ABI.
38026 Modulo handling of mlongcall, this is much the same as sysv.
38027 if/when the longcall optimisation is removed, we could drop this
38028 code and use the sysv case (taking care to avoid the tls stuff).
38030 We can use this for sibcalls too, if needed. */
38033 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
38034 rtx cookie, bool sibcall)
38036 rtx func = func_desc;
38040 int cookie_val = INTVAL (cookie);
38041 bool make_island = false;
38043 /* Handle longcall attributes, there are two cases for Darwin:
38044 1) Newer linkers are capable of synthesising any branch islands needed.
38045 2) We need a helper branch island synthesised by the compiler.
38046 The second case has mostly been retired and we don't use it for m64.
38047 In fact, it's is an optimisation, we could just indirect as sysv does..
38048 ... however, backwards compatibility for now.
38049 If we're going to use this, then we need to keep the CALL_LONG bit set,
38050 so that we can pick up the special insn form later. */
38051 if ((cookie_val & CALL_LONG) != 0
38052 && GET_CODE (func_desc) == SYMBOL_REF)
38054 if (darwin_emit_branch_islands && TARGET_32BIT)
38055 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
38058 /* The linker is capable of doing this, but the user explicitly
38059 asked for -mlongcall, so we'll do the 'normal' version. */
38060 func = rs6000_longcall_ref (func_desc, NULL_RTX);
38061 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
38065 /* Handle indirect calls. */
38066 if (GET_CODE (func) != SYMBOL_REF)
38068 func = force_reg (Pmode, func);
38070 /* Indirect calls via CTR are strongly preferred over indirect
38071 calls via LR, and are required for indirect sibcalls, so move
38072 the address there. */
38073 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
38074 emit_move_insn (func_addr, func);
38079 /* Create the call. */
38080 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
38081 if (value != NULL_RTX)
38082 call[0] = gen_rtx_SET (value, call[0]);
38084 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
38087 call[2] = simple_return_rtx;
38089 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
38091 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
38092 insn = emit_call_insn (insn);
38093 /* Now we have the debug info in the insn, we can set up the branch island
38094 if we're using one. */
38097 tree funname = get_identifier (XSTR (func_desc, 0));
38099 if (no_previous_def (funname))
38101 rtx label_rtx = gen_label_rtx ();
38102 char *label_buf, temp_buf[256];
38103 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
38104 CODE_LABEL_NUMBER (label_rtx));
38105 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
38106 tree labelname = get_identifier (label_buf);
38107 add_compiler_branch_island (labelname, funname,
38108 insn_line ((const rtx_insn*)insn));
38115 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
38116 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
38119 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
38127 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
38128 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
38131 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
38138 /* Return whether we need to always update the saved TOC pointer when we update
38139 the stack pointer. */
38142 rs6000_save_toc_in_prologue_p (void)
38144 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
38147 #ifdef HAVE_GAS_HIDDEN
38148 # define USE_HIDDEN_LINKONCE 1
38150 # define USE_HIDDEN_LINKONCE 0
38153 /* Fills in the label name that should be used for a 476 link stack thunk. */
38156 get_ppc476_thunk_name (char name[32])
38158 gcc_assert (TARGET_LINK_STACK);
38160 if (USE_HIDDEN_LINKONCE)
38161 sprintf (name, "__ppc476.get_thunk");
38163 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
38166 /* This function emits the simple thunk routine that is used to preserve
38167 the link stack on the 476 cpu. */
38169 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
38171 rs6000_code_end (void)
38176 if (!TARGET_LINK_STACK)
38179 get_ppc476_thunk_name (name);
38181 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
38182 build_function_type_list (void_type_node, NULL_TREE));
38183 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
38184 NULL_TREE, void_type_node);
38185 TREE_PUBLIC (decl) = 1;
38186 TREE_STATIC (decl) = 1;
38189 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
38191 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
38192 targetm.asm_out.unique_section (decl, 0);
38193 switch_to_section (get_named_section (decl, NULL, 0));
38194 DECL_WEAK (decl) = 1;
38195 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
38196 targetm.asm_out.globalize_label (asm_out_file, name);
38197 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
38198 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
38203 switch_to_section (text_section);
38204 ASM_OUTPUT_LABEL (asm_out_file, name);
38207 DECL_INITIAL (decl) = make_node (BLOCK);
38208 current_function_decl = decl;
38209 allocate_struct_function (decl, false);
38210 init_function_start (decl);
38211 first_function_block_is_cold = false;
38212 /* Make sure unwind info is emitted for the thunk if needed. */
38213 final_start_function (emit_barrier (), asm_out_file, 1);
38215 fputs ("\tblr\n", asm_out_file);
38217 final_end_function ();
38218 init_insn_lengths ();
38219 free_after_compilation (cfun);
38221 current_function_decl = NULL;
38224 /* Add r30 to hard reg set if the prologue sets it up and it is not
38225 pic_offset_table_rtx. */
38228 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
38230 if (!TARGET_SINGLE_PIC_BASE
38232 && TARGET_MINIMAL_TOC
38233 && !constant_pool_empty_p ())
38234 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
38235 if (cfun->machine->split_stack_argp_used)
38236 add_to_hard_reg_set (&set->set, Pmode, 12);
38238 /* Make sure the hard reg set doesn't include r2, which was possibly added
38239 via PIC_OFFSET_TABLE_REGNUM. */
38241 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
38245 /* Helper function for rs6000_split_logical to emit a logical instruction after
38246 spliting the operation to single GPR registers.
38248 DEST is the destination register.
38249 OP1 and OP2 are the input source registers.
38250 CODE is the base operation (AND, IOR, XOR, NOT).
38251 MODE is the machine mode.
38252 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38253 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38254 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38257 rs6000_split_logical_inner (rtx dest,
38260 enum rtx_code code,
38262 bool complement_final_p,
38263 bool complement_op1_p,
38264 bool complement_op2_p)
38268 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
38269 if (op2 && CONST_INT_P (op2)
38270 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
38271 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38273 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
38274 HOST_WIDE_INT value = INTVAL (op2) & mask;
38276 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
38281 emit_insn (gen_rtx_SET (dest, const0_rtx));
38285 else if (value == mask)
38287 if (!rtx_equal_p (dest, op1))
38288 emit_insn (gen_rtx_SET (dest, op1));
38293 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
38294 into separate ORI/ORIS or XORI/XORIS instrucitons. */
38295 else if (code == IOR || code == XOR)
38299 if (!rtx_equal_p (dest, op1))
38300 emit_insn (gen_rtx_SET (dest, op1));
38306 if (code == AND && mode == SImode
38307 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38309 emit_insn (gen_andsi3 (dest, op1, op2));
38313 if (complement_op1_p)
38314 op1 = gen_rtx_NOT (mode, op1);
38316 if (complement_op2_p)
38317 op2 = gen_rtx_NOT (mode, op2);
38319 /* For canonical RTL, if only one arm is inverted it is the first. */
38320 if (!complement_op1_p && complement_op2_p)
38321 std::swap (op1, op2);
38323 bool_rtx = ((code == NOT)
38324 ? gen_rtx_NOT (mode, op1)
38325 : gen_rtx_fmt_ee (code, mode, op1, op2));
38327 if (complement_final_p)
38328 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
38330 emit_insn (gen_rtx_SET (dest, bool_rtx));
38333 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
38334 operations are split immediately during RTL generation to allow for more
38335 optimizations of the AND/IOR/XOR.
38337 OPERANDS is an array containing the destination and two input operands.
38338 CODE is the base operation (AND, IOR, XOR, NOT).
38339 MODE is the machine mode.
38340 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38341 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38342 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
38343 CLOBBER_REG is either NULL or a scratch register of type CC to allow
38344 formation of the AND instructions. */
38347 rs6000_split_logical_di (rtx operands[3],
38348 enum rtx_code code,
38349 bool complement_final_p,
38350 bool complement_op1_p,
38351 bool complement_op2_p)
38353 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
38354 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
38355 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
38356 enum hi_lo { hi = 0, lo = 1 };
38357 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
38360 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
38361 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
38362 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
38363 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
38366 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
38369 if (!CONST_INT_P (operands[2]))
38371 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
38372 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
38376 HOST_WIDE_INT value = INTVAL (operands[2]);
38377 HOST_WIDE_INT value_hi_lo[2];
38379 gcc_assert (!complement_final_p);
38380 gcc_assert (!complement_op1_p);
38381 gcc_assert (!complement_op2_p);
38383 value_hi_lo[hi] = value >> 32;
38384 value_hi_lo[lo] = value & lower_32bits;
38386 for (i = 0; i < 2; i++)
38388 HOST_WIDE_INT sub_value = value_hi_lo[i];
38390 if (sub_value & sign_bit)
38391 sub_value |= upper_32bits;
38393 op2_hi_lo[i] = GEN_INT (sub_value);
38395 /* If this is an AND instruction, check to see if we need to load
38396 the value in a register. */
38397 if (code == AND && sub_value != -1 && sub_value != 0
38398 && !and_operand (op2_hi_lo[i], SImode))
38399 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
38404 for (i = 0; i < 2; i++)
38406 /* Split large IOR/XOR operations. */
38407 if ((code == IOR || code == XOR)
38408 && CONST_INT_P (op2_hi_lo[i])
38409 && !complement_final_p
38410 && !complement_op1_p
38411 && !complement_op2_p
38412 && !logical_const_operand (op2_hi_lo[i], SImode))
38414 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
38415 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
38416 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
38417 rtx tmp = gen_reg_rtx (SImode);
38419 /* Make sure the constant is sign extended. */
38420 if ((hi_16bits & sign_bit) != 0)
38421 hi_16bits |= upper_32bits;
38423 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
38424 code, SImode, false, false, false);
38426 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
38427 code, SImode, false, false, false);
38430 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
38431 code, SImode, complement_final_p,
38432 complement_op1_p, complement_op2_p);
38438 /* Split the insns that make up boolean operations operating on multiple GPR
38439 registers. The boolean MD patterns ensure that the inputs either are
38440 exactly the same as the output registers, or there is no overlap.
38442 OPERANDS is an array containing the destination and two input operands.
38443 CODE is the base operation (AND, IOR, XOR, NOT).
38444 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38445 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38446 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38449 rs6000_split_logical (rtx operands[3],
38450 enum rtx_code code,
38451 bool complement_final_p,
38452 bool complement_op1_p,
38453 bool complement_op2_p)
38455 machine_mode mode = GET_MODE (operands[0]);
38456 machine_mode sub_mode;
38458 int sub_size, regno0, regno1, nregs, i;
38460 /* If this is DImode, use the specialized version that can run before
38461 register allocation. */
38462 if (mode == DImode && !TARGET_POWERPC64)
38464 rs6000_split_logical_di (operands, code, complement_final_p,
38465 complement_op1_p, complement_op2_p);
38471 op2 = (code == NOT) ? NULL_RTX : operands[2];
38472 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
38473 sub_size = GET_MODE_SIZE (sub_mode);
38474 regno0 = REGNO (op0);
38475 regno1 = REGNO (op1);
38477 gcc_assert (reload_completed);
38478 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38479 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38481 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
38482 gcc_assert (nregs > 1);
38484 if (op2 && REG_P (op2))
38485 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
38487 for (i = 0; i < nregs; i++)
38489 int offset = i * sub_size;
38490 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
38491 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
38492 rtx sub_op2 = ((code == NOT)
38494 : simplify_subreg (sub_mode, op2, mode, offset));
38496 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
38497 complement_final_p, complement_op1_p,
38505 /* Return true if the peephole2 can combine a load involving a combination of
38506 an addis instruction and a load with an offset that can be fused together on
38510 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
38511 rtx addis_value, /* addis value. */
38512 rtx target, /* target register that is loaded. */
38513 rtx mem) /* bottom part of the memory addr. */
38518 /* Validate arguments. */
38519 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38522 if (!base_reg_operand (target, GET_MODE (target)))
38525 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38528 /* Allow sign/zero extension. */
38529 if (GET_CODE (mem) == ZERO_EXTEND
38530 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
38531 mem = XEXP (mem, 0);
38536 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
38539 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38540 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
38543 /* Validate that the register used to load the high value is either the
38544 register being loaded, or we can safely replace its use.
38546 This function is only called from the peephole2 pass and we assume that
38547 there are 2 instructions in the peephole (addis and load), so we want to
38548 check if the target register was not used in the memory address and the
38549 register to hold the addis result is dead after the peephole. */
38550 if (REGNO (addis_reg) != REGNO (target))
38552 if (reg_mentioned_p (target, mem))
38555 if (!peep2_reg_dead_p (2, addis_reg))
38558 /* If the target register being loaded is the stack pointer, we must
38559 avoid loading any other value into it, even temporarily. */
38560 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
38564 base_reg = XEXP (addr, 0);
38565 return REGNO (addis_reg) == REGNO (base_reg);
38568 /* During the peephole2 pass, adjust and expand the insns for a load fusion
38569 sequence. We adjust the addis register to use the target register. If the
38570 load sign extends, we adjust the code to do the zero extending load, and an
38571 explicit sign extension later since the fusion only covers zero extending
38575 operands[0] register set with addis (to be replaced with target)
38576 operands[1] value set via addis
38577 operands[2] target register being loaded
38578 operands[3] D-form memory reference using operands[0]. */
38581 expand_fusion_gpr_load (rtx *operands)
38583 rtx addis_value = operands[1];
38584 rtx target = operands[2];
38585 rtx orig_mem = operands[3];
38586 rtx new_addr, new_mem, orig_addr, offset;
38587 enum rtx_code plus_or_lo_sum;
38588 machine_mode target_mode = GET_MODE (target);
38589 machine_mode extend_mode = target_mode;
38590 machine_mode ptr_mode = Pmode;
38591 enum rtx_code extend = UNKNOWN;
38593 if (GET_CODE (orig_mem) == ZERO_EXTEND
38594 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
38596 extend = GET_CODE (orig_mem);
38597 orig_mem = XEXP (orig_mem, 0);
38598 target_mode = GET_MODE (orig_mem);
38601 gcc_assert (MEM_P (orig_mem));
38603 orig_addr = XEXP (orig_mem, 0);
38604 plus_or_lo_sum = GET_CODE (orig_addr);
38605 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38607 offset = XEXP (orig_addr, 1);
38608 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38609 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38611 if (extend != UNKNOWN)
38612 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
38614 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
38615 UNSPEC_FUSION_GPR);
38616 emit_insn (gen_rtx_SET (target, new_mem));
38618 if (extend == SIGN_EXTEND)
38620 int sub_off = ((BYTES_BIG_ENDIAN)
38621 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
38624 = simplify_subreg (target_mode, target, extend_mode, sub_off);
38626 emit_insn (gen_rtx_SET (target,
38627 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
38633 /* Emit the addis instruction that will be part of a fused instruction
38637 emit_fusion_addis (rtx target, rtx addis_value)
38640 const char *addis_str = NULL;
38642 /* Emit the addis instruction. */
38643 fuse_ops[0] = target;
38644 if (satisfies_constraint_L (addis_value))
38646 fuse_ops[1] = addis_value;
38647 addis_str = "lis %0,%v1";
38650 else if (GET_CODE (addis_value) == PLUS)
38652 rtx op0 = XEXP (addis_value, 0);
38653 rtx op1 = XEXP (addis_value, 1);
38655 if (REG_P (op0) && CONST_INT_P (op1)
38656 && satisfies_constraint_L (op1))
38660 addis_str = "addis %0,%1,%v2";
38664 else if (GET_CODE (addis_value) == HIGH)
38666 rtx value = XEXP (addis_value, 0);
38667 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
38669 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
38670 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
38672 addis_str = "addis %0,%2,%1@toc@ha";
38674 else if (TARGET_XCOFF)
38675 addis_str = "addis %0,%1@u(%2)";
38678 gcc_unreachable ();
38681 else if (GET_CODE (value) == PLUS)
38683 rtx op0 = XEXP (value, 0);
38684 rtx op1 = XEXP (value, 1);
38686 if (GET_CODE (op0) == UNSPEC
38687 && XINT (op0, 1) == UNSPEC_TOCREL
38688 && CONST_INT_P (op1))
38690 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
38691 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
38694 addis_str = "addis %0,%2,%1+%3@toc@ha";
38696 else if (TARGET_XCOFF)
38697 addis_str = "addis %0,%1+%3@u(%2)";
38700 gcc_unreachable ();
38704 else if (satisfies_constraint_L (value))
38706 fuse_ops[1] = value;
38707 addis_str = "lis %0,%v1";
38710 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
38712 fuse_ops[1] = value;
38713 addis_str = "lis %0,%1@ha";
38718 fatal_insn ("Could not generate addis value for fusion", addis_value);
38720 output_asm_insn (addis_str, fuse_ops);
38723 /* Emit a D-form load or store instruction that is the second instruction
38724 of a fusion sequence. */
38727 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
38730 char insn_template[80];
38732 fuse_ops[0] = load_reg;
38733 fuse_ops[1] = addis_reg;
38735 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
38737 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
38738 fuse_ops[2] = offset;
38739 output_asm_insn (insn_template, fuse_ops);
38742 else if (GET_CODE (offset) == UNSPEC
38743 && XINT (offset, 1) == UNSPEC_TOCREL)
38746 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
38748 else if (TARGET_XCOFF)
38749 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
38752 gcc_unreachable ();
38754 fuse_ops[2] = XVECEXP (offset, 0, 0);
38755 output_asm_insn (insn_template, fuse_ops);
38758 else if (GET_CODE (offset) == PLUS
38759 && GET_CODE (XEXP (offset, 0)) == UNSPEC
38760 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
38761 && CONST_INT_P (XEXP (offset, 1)))
38763 rtx tocrel_unspec = XEXP (offset, 0);
38765 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
38767 else if (TARGET_XCOFF)
38768 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
38771 gcc_unreachable ();
38773 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
38774 fuse_ops[3] = XEXP (offset, 1);
38775 output_asm_insn (insn_template, fuse_ops);
38778 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
38780 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
38782 fuse_ops[2] = offset;
38783 output_asm_insn (insn_template, fuse_ops);
38787 fatal_insn ("Unable to generate load/store offset for fusion", offset);
38792 /* Given an address, convert it into the addis and load offset parts. Addresses
38793 created during the peephole2 process look like:
38794 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
38795 (unspec [(...)] UNSPEC_TOCREL)) */
38798 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
38802 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
38804 hi = XEXP (addr, 0);
38805 lo = XEXP (addr, 1);
38808 gcc_unreachable ();
38814 /* Return a string to fuse an addis instruction with a gpr load to the same
38815 register that we loaded up the addis instruction. The address that is used
38816 is the logical address that was formed during peephole2:
38817 (lo_sum (high) (low-part))
38819 The code is complicated, so we call output_asm_insn directly, and just
38823 emit_fusion_gpr_load (rtx target, rtx mem)
38828 const char *load_str = NULL;
38831 if (GET_CODE (mem) == ZERO_EXTEND)
38832 mem = XEXP (mem, 0);
38834 gcc_assert (REG_P (target) && MEM_P (mem));
38836 addr = XEXP (mem, 0);
38837 fusion_split_address (addr, &addis_value, &load_offset);
38839 /* Now emit the load instruction to the same register. */
38840 mode = GET_MODE (mem);
38858 gcc_assert (TARGET_POWERPC64);
38863 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
38866 /* Emit the addis instruction. */
38867 emit_fusion_addis (target, addis_value);
38869 /* Emit the D-form load instruction. */
38870 emit_fusion_load (target, target, load_offset, load_str);
38876 #ifdef RS6000_GLIBC_ATOMIC_FENV
38877 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
38878 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
38881 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
38884 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
38886 if (!TARGET_HARD_FLOAT)
38888 #ifdef RS6000_GLIBC_ATOMIC_FENV
38889 if (atomic_hold_decl == NULL_TREE)
38892 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38893 get_identifier ("__atomic_feholdexcept"),
38894 build_function_type_list (void_type_node,
38895 double_ptr_type_node,
38897 TREE_PUBLIC (atomic_hold_decl) = 1;
38898 DECL_EXTERNAL (atomic_hold_decl) = 1;
38901 if (atomic_clear_decl == NULL_TREE)
38904 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38905 get_identifier ("__atomic_feclearexcept"),
38906 build_function_type_list (void_type_node,
38908 TREE_PUBLIC (atomic_clear_decl) = 1;
38909 DECL_EXTERNAL (atomic_clear_decl) = 1;
38912 tree const_double = build_qualified_type (double_type_node,
38914 tree const_double_ptr = build_pointer_type (const_double);
38915 if (atomic_update_decl == NULL_TREE)
38918 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38919 get_identifier ("__atomic_feupdateenv"),
38920 build_function_type_list (void_type_node,
38923 TREE_PUBLIC (atomic_update_decl) = 1;
38924 DECL_EXTERNAL (atomic_update_decl) = 1;
38927 tree fenv_var = create_tmp_var_raw (double_type_node);
38928 TREE_ADDRESSABLE (fenv_var) = 1;
38929 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
38931 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
38932 *clear = build_call_expr (atomic_clear_decl, 0);
38933 *update = build_call_expr (atomic_update_decl, 1,
38934 fold_convert (const_double_ptr, fenv_addr));
38939 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
38940 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
38941 tree call_mffs = build_call_expr (mffs, 0);
38943 /* Generates the equivalent of feholdexcept (&fenv_var)
38945 *fenv_var = __builtin_mffs ();
38947 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
38948 __builtin_mtfsf (0xff, fenv_hold); */
38950 /* Mask to clear everything except for the rounding modes and non-IEEE
38951 arithmetic flag. */
38952 const unsigned HOST_WIDE_INT hold_exception_mask =
38953 HOST_WIDE_INT_C (0xffffffff00000007);
38955 tree fenv_var = create_tmp_var_raw (double_type_node);
38957 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
38959 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
38960 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
38961 build_int_cst (uint64_type_node,
38962 hold_exception_mask));
38964 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38967 tree hold_mtfsf = build_call_expr (mtfsf, 2,
38968 build_int_cst (unsigned_type_node, 0xff),
38971 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
38973 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
38975 double fenv_clear = __builtin_mffs ();
38976 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
38977 __builtin_mtfsf (0xff, fenv_clear); */
38979 /* Mask to clear everything except for the rounding modes and non-IEEE
38980 arithmetic flag. */
38981 const unsigned HOST_WIDE_INT clear_exception_mask =
38982 HOST_WIDE_INT_C (0xffffffff00000000);
38984 tree fenv_clear = create_tmp_var_raw (double_type_node);
38986 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
38988 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
38989 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
38991 build_int_cst (uint64_type_node,
38992 clear_exception_mask));
38994 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38995 fenv_clear_llu_and);
38997 tree clear_mtfsf = build_call_expr (mtfsf, 2,
38998 build_int_cst (unsigned_type_node, 0xff),
39001 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
39003 /* Generates the equivalent of feupdateenv (&fenv_var)
39005 double old_fenv = __builtin_mffs ();
39006 double fenv_update;
39007 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
39008 (*(uint64_t*)fenv_var 0x1ff80fff);
39009 __builtin_mtfsf (0xff, fenv_update); */
39011 const unsigned HOST_WIDE_INT update_exception_mask =
39012 HOST_WIDE_INT_C (0xffffffff1fffff00);
39013 const unsigned HOST_WIDE_INT new_exception_mask =
39014 HOST_WIDE_INT_C (0x1ff80fff);
39016 tree old_fenv = create_tmp_var_raw (double_type_node);
39017 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
39019 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
39020 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
39021 build_int_cst (uint64_type_node,
39022 update_exception_mask));
39024 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
39025 build_int_cst (uint64_type_node,
39026 new_exception_mask));
39028 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
39029 old_llu_and, new_llu_and);
39031 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
39034 tree update_mtfsf = build_call_expr (mtfsf, 2,
39035 build_int_cst (unsigned_type_node, 0xff),
39036 fenv_update_mtfsf);
39038 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
39042 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
39044 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
39046 rtx_tmp0 = gen_reg_rtx (V2DFmode);
39047 rtx_tmp1 = gen_reg_rtx (V2DFmode);
39049 /* The destination of the vmrgew instruction layout is:
39050 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
39051 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
39052 vmrgew instruction will be correct. */
39053 if (BYTES_BIG_ENDIAN)
39055 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
39057 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
39062 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
39063 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
39066 rtx_tmp2 = gen_reg_rtx (V4SFmode);
39067 rtx_tmp3 = gen_reg_rtx (V4SFmode);
39069 emit_insn (gen_vsx_xvcdpsp (rtx_tmp2, rtx_tmp0));
39070 emit_insn (gen_vsx_xvcdpsp (rtx_tmp3, rtx_tmp1));
39072 if (BYTES_BIG_ENDIAN)
39073 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
39075 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
39079 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
39081 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
39083 rtx_tmp0 = gen_reg_rtx (V2DImode);
39084 rtx_tmp1 = gen_reg_rtx (V2DImode);
39086 /* The destination of the vmrgew instruction layout is:
39087 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
39088 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
39089 vmrgew instruction will be correct. */
39090 if (BYTES_BIG_ENDIAN)
39092 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
39093 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
39097 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
39098 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
39101 rtx_tmp2 = gen_reg_rtx (V4SFmode);
39102 rtx_tmp3 = gen_reg_rtx (V4SFmode);
39104 if (signed_convert)
39106 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
39107 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
39111 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
39112 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
39115 if (BYTES_BIG_ENDIAN)
39116 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
39118 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
39122 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
39125 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
39127 rtx_tmp0 = gen_reg_rtx (V2DFmode);
39128 rtx_tmp1 = gen_reg_rtx (V2DFmode);
39130 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
39131 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
39133 rtx_tmp2 = gen_reg_rtx (V4SImode);
39134 rtx_tmp3 = gen_reg_rtx (V4SImode);
39136 if (signed_convert)
39138 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
39139 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
39143 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
39144 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
39147 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
39150 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
39153 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
39154 optimization_type opt_type)
39159 return (opt_type == OPTIMIZE_FOR_SPEED
39160 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
39167 /* Implement TARGET_CONSTANT_ALIGNMENT. */
39169 static HOST_WIDE_INT
39170 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
39172 if (TREE_CODE (exp) == STRING_CST
39173 && (STRICT_ALIGNMENT || !optimize_size))
39174 return MAX (align, BITS_PER_WORD);
39178 /* Implement TARGET_STARTING_FRAME_OFFSET. */
39180 static HOST_WIDE_INT
39181 rs6000_starting_frame_offset (void)
39183 if (FRAME_GROWS_DOWNWARD)
39185 return RS6000_STARTING_FRAME_OFFSET;
39189 /* Create an alias for a mangled name where we have changed the mangling (in
39190 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
39191 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
39193 #if TARGET_ELF && RS6000_WEAK
39195 rs6000_globalize_decl_name (FILE * stream, tree decl)
39197 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
39199 targetm.asm_out.globalize_label (stream, name);
39201 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
39203 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
39204 const char *old_name;
39206 ieee128_mangling_gcc_8_1 = true;
39207 lang_hooks.set_decl_assembler_name (decl);
39208 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
39209 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
39210 ieee128_mangling_gcc_8_1 = false;
39212 if (strcmp (name, old_name) != 0)
39214 fprintf (stream, "\t.weak %s\n", old_name);
39215 fprintf (stream, "\t.set %s,%s\n", old_name, name);
39222 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
39223 function names from <foo>l to <foo>f128 if the default long double type is
39224 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
39225 include file switches the names on systems that support long double as IEEE
39226 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
39227 In the future, glibc will export names like __ieee128_sinf128 and we can
39228 switch to using those instead of using sinf128, which pollutes the user's
39231 This will switch the names for Fortran math functions as well (which doesn't
39232 use math.h). However, Fortran needs other changes to the compiler and
39233 library before you can switch the real*16 type at compile time.
39235 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
39236 only do this if the default is that long double is IBM extended double, and
39237 the user asked for IEEE 128-bit. */
39240 rs6000_mangle_decl_assembler_name (tree decl, tree id)
39242 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
39243 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
39245 size_t len = IDENTIFIER_LENGTH (id);
39246 const char *name = IDENTIFIER_POINTER (id);
39248 if (name[len - 1] == 'l')
39250 bool uses_ieee128_p = false;
39251 tree type = TREE_TYPE (decl);
39252 machine_mode ret_mode = TYPE_MODE (type);
39254 /* See if the function returns a IEEE 128-bit floating point type or
39256 if (ret_mode == TFmode || ret_mode == TCmode)
39257 uses_ieee128_p = true;
39260 function_args_iterator args_iter;
39263 /* See if the function passes a IEEE 128-bit floating point type
39264 or complex type. */
39265 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
39267 machine_mode arg_mode = TYPE_MODE (arg);
39268 if (arg_mode == TFmode || arg_mode == TCmode)
39270 uses_ieee128_p = true;
39276 /* If we passed or returned an IEEE 128-bit floating point type,
39277 change the name. */
39278 if (uses_ieee128_p)
39280 char *name2 = (char *) alloca (len + 4);
39281 memcpy (name2, name, len - 1);
39282 strcpy (name2 + len - 1, "f128");
39283 id = get_identifier (name2);
39292 struct gcc_target targetm = TARGET_INITIALIZER;
39294 #include "gt-rs6000.h"