1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
25 #include "coretypes.h"
35 #include "stringpool.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
46 #include "fold-const.h"
48 #include "stor-layout.h"
50 #include "print-tree.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
59 #include "sched-int.h"
61 #include "gimple-fold.h"
62 #include "gimple-iterator.h"
63 #include "gimple-ssa.h"
64 #include "gimple-walk.h"
67 #include "tm-constrs.h"
68 #include "tree-vectorizer.h"
69 #include "target-globals.h"
71 #include "tree-vector-builder.h"
73 #include "tree-pass.h"
76 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
79 #include "gstab.h" /* for N_SLINE */
81 #include "case-cfn-macros.h"
83 #include "tree-ssa-propagate.h"
85 #include "tree-ssanames.h"
87 /* This file should be included last. */
88 #include "target-def.h"
90 #ifndef TARGET_NO_PROTOTYPE
91 #define TARGET_NO_PROTOTYPE 0
94 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
95 systems will also set long double to be IEEE 128-bit. AIX and Darwin
96 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
97 those systems will not pick up this default. This needs to be after all
98 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
100 #ifndef TARGET_IEEEQUAD_DEFAULT
101 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
102 #define TARGET_IEEEQUAD_DEFAULT 1
104 #define TARGET_IEEEQUAD_DEFAULT 0
108 static pad_direction rs6000_function_arg_padding (machine_mode, const_tree);
110 /* Structure used to define the rs6000 stack */
111 typedef struct rs6000_stack {
112 int reload_completed; /* stack info won't change from here on */
113 int first_gp_reg_save; /* first callee saved GP register used */
114 int first_fp_reg_save; /* first callee saved FP register used */
115 int first_altivec_reg_save; /* first callee saved AltiVec register used */
116 int lr_save_p; /* true if the link reg needs to be saved */
117 int cr_save_p; /* true if the CR reg needs to be saved */
118 unsigned int vrsave_mask; /* mask of vec registers to save */
119 int push_p; /* true if we need to allocate stack space */
120 int calls_p; /* true if the function makes any calls */
121 int world_save_p; /* true if we're saving *everything*:
122 r13-r31, cr, f14-f31, vrsave, v20-v31 */
123 enum rs6000_abi abi; /* which ABI to use */
124 int gp_save_offset; /* offset to save GP regs from initial SP */
125 int fp_save_offset; /* offset to save FP regs from initial SP */
126 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
127 int lr_save_offset; /* offset to save LR from initial SP */
128 int cr_save_offset; /* offset to save CR from initial SP */
129 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
130 int varargs_save_offset; /* offset to save the varargs registers */
131 int ehrd_offset; /* offset to EH return data */
132 int ehcr_offset; /* offset to EH CR field data */
133 int reg_size; /* register size (4 or 8) */
134 HOST_WIDE_INT vars_size; /* variable save area size */
135 int parm_size; /* outgoing parameter size */
136 int save_size; /* save area size */
137 int fixed_size; /* fixed size of stack frame */
138 int gp_size; /* size of saved GP registers */
139 int fp_size; /* size of saved FP registers */
140 int altivec_size; /* size of saved AltiVec registers */
141 int cr_size; /* size to hold CR if not in fixed area */
142 int vrsave_size; /* size to hold VRSAVE */
143 int altivec_padding_size; /* size of altivec alignment padding */
144 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
148 /* A C structure for machine-specific, per-function data.
149 This is added to the cfun structure. */
150 typedef struct GTY(()) machine_function
152 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
153 int ra_needs_full_frame;
154 /* Flags if __builtin_return_address (0) was used. */
156 /* Cache lr_save_p after expansion of builtin_eh_return. */
158 /* Whether we need to save the TOC to the reserved stack location in the
159 function prologue. */
160 bool save_toc_in_prologue;
161 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
162 varargs save area. */
163 HOST_WIDE_INT varargs_save_offset;
164 /* Alternative internal arg pointer for -fsplit-stack. */
165 rtx split_stack_arg_pointer;
166 bool split_stack_argp_used;
167 /* Flag if r2 setup is needed with ELFv2 ABI. */
168 bool r2_setup_needed;
169 /* The number of components we use for separate shrink-wrapping. */
171 /* The components already handled by separate shrink-wrapping, which should
172 not be considered by the prologue and epilogue. */
173 bool gpr_is_wrapped_separately[32];
174 bool fpr_is_wrapped_separately[32];
175 bool lr_is_wrapped_separately;
176 bool toc_is_wrapped_separately;
179 /* Support targetm.vectorize.builtin_mask_for_load. */
180 static GTY(()) tree altivec_builtin_mask_for_load;
182 /* Set to nonzero once AIX common-mode calls have been defined. */
183 static GTY(()) int common_mode_defined;
185 /* Label number of label created for -mrelocatable, to call to so we can
186 get the address of the GOT section */
187 static int rs6000_pic_labelno;
190 /* Counter for labels which are to be placed in .fixup. */
191 int fixuplabelno = 0;
194 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
197 /* Specify the machine mode that pointers have. After generation of rtl, the
198 compiler makes no further distinction between pointers and any other objects
199 of this machine mode. */
200 scalar_int_mode rs6000_pmode;
203 /* Note whether IEEE 128-bit floating point was passed or returned, either as
204 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
205 floating point. We changed the default C++ mangling for these types and we
206 may want to generate a weak alias of the old mangling (U10__float128) to the
207 new mangling (u9__ieee128). */
208 static bool rs6000_passes_ieee128;
211 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
212 name used in current releases (i.e. u9__ieee128). */
213 static bool ieee128_mangling_gcc_8_1;
215 /* Width in bits of a pointer. */
216 unsigned rs6000_pointer_size;
218 #ifdef HAVE_AS_GNU_ATTRIBUTE
219 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
220 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
222 /* Flag whether floating point values have been passed/returned.
223 Note that this doesn't say whether fprs are used, since the
224 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
225 should be set for soft-float values passed in gprs and ieee128
226 values passed in vsx registers. */
227 static bool rs6000_passes_float;
228 static bool rs6000_passes_long_double;
229 /* Flag whether vector values have been passed/returned. */
230 static bool rs6000_passes_vector;
231 /* Flag whether small (<= 8 byte) structures have been returned. */
232 static bool rs6000_returns_struct;
235 /* Value is TRUE if register/mode pair is acceptable. */
236 static bool rs6000_hard_regno_mode_ok_p
237 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
239 /* Maximum number of registers needed for a given register class and mode. */
240 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
242 /* How many registers are needed for a given register and mode. */
243 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
245 /* Map register number to register class. */
246 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
248 static int dbg_cost_ctrl;
250 /* Built in types. */
251 tree rs6000_builtin_types[RS6000_BTI_MAX];
252 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
254 /* Flag to say the TOC is initialized */
255 int toc_initialized, need_toc_init;
256 char toc_label_name[10];
258 /* Cached value of rs6000_variable_issue. This is cached in
259 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
260 static short cached_can_issue_more;
262 static GTY(()) section *read_only_data_section;
263 static GTY(()) section *private_data_section;
264 static GTY(()) section *tls_data_section;
265 static GTY(()) section *tls_private_data_section;
266 static GTY(()) section *read_only_private_data_section;
267 static GTY(()) section *sdata2_section;
268 static GTY(()) section *toc_section;
270 struct builtin_description
272 const HOST_WIDE_INT mask;
273 const enum insn_code icode;
274 const char *const name;
275 const enum rs6000_builtins code;
278 /* Describe the vector unit used for modes. */
279 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
280 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
282 /* Register classes for various constraints that are based on the target
284 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
286 /* Describe the alignment of a vector. */
287 int rs6000_vector_align[NUM_MACHINE_MODES];
289 /* Map selected modes to types for builtins. */
290 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
292 /* What modes to automatically generate reciprocal divide estimate (fre) and
293 reciprocal sqrt (frsqrte) for. */
294 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
296 /* Masks to determine which reciprocal esitmate instructions to generate
298 enum rs6000_recip_mask {
299 RECIP_SF_DIV = 0x001, /* Use divide estimate */
300 RECIP_DF_DIV = 0x002,
301 RECIP_V4SF_DIV = 0x004,
302 RECIP_V2DF_DIV = 0x008,
304 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
305 RECIP_DF_RSQRT = 0x020,
306 RECIP_V4SF_RSQRT = 0x040,
307 RECIP_V2DF_RSQRT = 0x080,
309 /* Various combination of flags for -mrecip=xxx. */
311 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
312 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
313 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
315 RECIP_HIGH_PRECISION = RECIP_ALL,
317 /* On low precision machines like the power5, don't enable double precision
318 reciprocal square root estimate, since it isn't accurate enough. */
319 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
322 /* -mrecip options. */
325 const char *string; /* option name */
326 unsigned int mask; /* mask bits to set */
327 } recip_options[] = {
328 { "all", RECIP_ALL },
329 { "none", RECIP_NONE },
330 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
332 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
333 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
334 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
335 | RECIP_V2DF_RSQRT) },
336 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
337 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
340 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
346 { "power9", PPC_PLATFORM_POWER9 },
347 { "power8", PPC_PLATFORM_POWER8 },
348 { "power7", PPC_PLATFORM_POWER7 },
349 { "power6x", PPC_PLATFORM_POWER6X },
350 { "power6", PPC_PLATFORM_POWER6 },
351 { "power5+", PPC_PLATFORM_POWER5_PLUS },
352 { "power5", PPC_PLATFORM_POWER5 },
353 { "ppc970", PPC_PLATFORM_PPC970 },
354 { "power4", PPC_PLATFORM_POWER4 },
355 { "ppca2", PPC_PLATFORM_PPCA2 },
356 { "ppc476", PPC_PLATFORM_PPC476 },
357 { "ppc464", PPC_PLATFORM_PPC464 },
358 { "ppc440", PPC_PLATFORM_PPC440 },
359 { "ppc405", PPC_PLATFORM_PPC405 },
360 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
363 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
369 } cpu_supports_info[] = {
370 /* AT_HWCAP masks. */
371 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
372 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
373 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
374 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
375 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
376 { "booke", PPC_FEATURE_BOOKE, 0 },
377 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
378 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
379 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
380 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
381 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
382 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
383 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
384 { "notb", PPC_FEATURE_NO_TB, 0 },
385 { "pa6t", PPC_FEATURE_PA6T, 0 },
386 { "power4", PPC_FEATURE_POWER4, 0 },
387 { "power5", PPC_FEATURE_POWER5, 0 },
388 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
389 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
390 { "ppc32", PPC_FEATURE_32, 0 },
391 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
392 { "ppc64", PPC_FEATURE_64, 0 },
393 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
394 { "smt", PPC_FEATURE_SMT, 0 },
395 { "spe", PPC_FEATURE_HAS_SPE, 0 },
396 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
397 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
398 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
400 /* AT_HWCAP2 masks. */
401 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
402 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
403 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
404 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
405 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
406 { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 },
407 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
408 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
409 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
410 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
411 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 },
412 { "darn", PPC_FEATURE2_DARN, 1 },
413 { "scv", PPC_FEATURE2_SCV, 1 }
416 /* On PowerPC, we have a limited number of target clones that we care about
417 which means we can use an array to hold the options, rather than having more
418 elaborate data structures to identify each possible variation. Order the
419 clones from the default to the highest ISA. */
421 CLONE_DEFAULT = 0, /* default clone. */
422 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
423 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
424 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
425 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
429 /* Map compiler ISA bits into HWCAP names. */
431 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
432 const char *name; /* name to use in __builtin_cpu_supports. */
435 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
436 { 0, "" }, /* Default options. */
437 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
438 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
439 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
440 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
444 /* Newer LIBCs explicitly export this symbol to declare that they provide
445 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
446 reference to this symbol whenever we expand a CPU builtin, so that
447 we never link against an old LIBC. */
448 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
450 /* True if we have expanded a CPU builtin. */
453 /* Pointer to function (in rs6000-c.c) that can define or undefine target
454 macros that have changed. Languages that don't support the preprocessor
455 don't link in rs6000-c.c, so we can't call it directly. */
456 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
458 /* Simplfy register classes into simpler classifications. We assume
459 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
460 check for standard register classes (gpr/floating/altivec/vsx) and
461 floating/vector classes (float/altivec/vsx). */
463 enum rs6000_reg_type {
474 /* Map register class to register type. */
475 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
477 /* First/last register type for the 'normal' register types (i.e. general
478 purpose, floating point, altivec, and VSX registers). */
479 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
481 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
484 /* Register classes we care about in secondary reload or go if legitimate
485 address. We only need to worry about GPR, FPR, and Altivec registers here,
486 along an ANY field that is the OR of the 3 register classes. */
488 enum rs6000_reload_reg_type {
489 RELOAD_REG_GPR, /* General purpose registers. */
490 RELOAD_REG_FPR, /* Traditional floating point regs. */
491 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
492 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
496 /* For setting up register classes, loop through the 3 register classes mapping
497 into real registers, and skip the ANY class, which is just an OR of the
499 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
500 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
502 /* Map reload register type to a register in the register class. */
503 struct reload_reg_map_type {
504 const char *name; /* Register class name. */
505 int reg; /* Register in the register class. */
508 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
509 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
510 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
511 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
512 { "Any", -1 }, /* RELOAD_REG_ANY. */
515 /* Mask bits for each register class, indexed per mode. Historically the
516 compiler has been more restrictive which types can do PRE_MODIFY instead of
517 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
518 typedef unsigned char addr_mask_type;
520 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
521 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
522 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
523 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
524 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
525 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
526 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
527 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
529 /* Register type masks based on the type, of valid addressing modes. */
530 struct rs6000_reg_addr {
531 enum insn_code reload_load; /* INSN to reload for loading. */
532 enum insn_code reload_store; /* INSN to reload for storing. */
533 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
534 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
535 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
536 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
537 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
540 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
542 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
544 mode_supports_pre_incdec_p (machine_mode mode)
546 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
550 /* Helper function to say whether a mode supports PRE_MODIFY. */
552 mode_supports_pre_modify_p (machine_mode mode)
554 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
558 /* Return true if we have D-form addressing in altivec registers. */
560 mode_supports_vmx_dform (machine_mode mode)
562 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
565 /* Return true if we have D-form addressing in VSX registers. This addressing
566 is more limited than normal d-form addressing in that the offset must be
567 aligned on a 16-byte boundary. */
569 mode_supports_dq_form (machine_mode mode)
571 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
575 /* Given that there exists at least one variable that is set (produced)
576 by OUT_INSN and read (consumed) by IN_INSN, return true iff
577 IN_INSN represents one or more memory store operations and none of
578 the variables set by OUT_INSN is used by IN_INSN as the address of a
579 store operation. If either IN_INSN or OUT_INSN does not represent
580 a "single" RTL SET expression (as loosely defined by the
581 implementation of the single_set function) or a PARALLEL with only
582 SETs, CLOBBERs, and USEs inside, this function returns false.
584 This rs6000-specific version of store_data_bypass_p checks for
585 certain conditions that result in assertion failures (and internal
586 compiler errors) in the generic store_data_bypass_p function and
587 returns false rather than calling store_data_bypass_p if one of the
588 problematic conditions is detected. */
591 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
598 in_set = single_set (in_insn);
601 if (MEM_P (SET_DEST (in_set)))
603 out_set = single_set (out_insn);
606 out_pat = PATTERN (out_insn);
607 if (GET_CODE (out_pat) == PARALLEL)
609 for (i = 0; i < XVECLEN (out_pat, 0); i++)
611 out_exp = XVECEXP (out_pat, 0, i);
612 if ((GET_CODE (out_exp) == CLOBBER)
613 || (GET_CODE (out_exp) == USE))
615 else if (GET_CODE (out_exp) != SET)
624 in_pat = PATTERN (in_insn);
625 if (GET_CODE (in_pat) != PARALLEL)
628 for (i = 0; i < XVECLEN (in_pat, 0); i++)
630 in_exp = XVECEXP (in_pat, 0, i);
631 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
633 else if (GET_CODE (in_exp) != SET)
636 if (MEM_P (SET_DEST (in_exp)))
638 out_set = single_set (out_insn);
641 out_pat = PATTERN (out_insn);
642 if (GET_CODE (out_pat) != PARALLEL)
644 for (j = 0; j < XVECLEN (out_pat, 0); j++)
646 out_exp = XVECEXP (out_pat, 0, j);
647 if ((GET_CODE (out_exp) == CLOBBER)
648 || (GET_CODE (out_exp) == USE))
650 else if (GET_CODE (out_exp) != SET)
657 return store_data_bypass_p (out_insn, in_insn);
661 /* Processor costs (relative to an add) */
663 const struct processor_costs *rs6000_cost;
665 /* Instruction size costs on 32bit processors. */
667 struct processor_costs size32_cost = {
668 COSTS_N_INSNS (1), /* mulsi */
669 COSTS_N_INSNS (1), /* mulsi_const */
670 COSTS_N_INSNS (1), /* mulsi_const9 */
671 COSTS_N_INSNS (1), /* muldi */
672 COSTS_N_INSNS (1), /* divsi */
673 COSTS_N_INSNS (1), /* divdi */
674 COSTS_N_INSNS (1), /* fp */
675 COSTS_N_INSNS (1), /* dmul */
676 COSTS_N_INSNS (1), /* sdiv */
677 COSTS_N_INSNS (1), /* ddiv */
678 32, /* cache line size */
682 0, /* SF->DF convert */
685 /* Instruction size costs on 64bit processors. */
687 struct processor_costs size64_cost = {
688 COSTS_N_INSNS (1), /* mulsi */
689 COSTS_N_INSNS (1), /* mulsi_const */
690 COSTS_N_INSNS (1), /* mulsi_const9 */
691 COSTS_N_INSNS (1), /* muldi */
692 COSTS_N_INSNS (1), /* divsi */
693 COSTS_N_INSNS (1), /* divdi */
694 COSTS_N_INSNS (1), /* fp */
695 COSTS_N_INSNS (1), /* dmul */
696 COSTS_N_INSNS (1), /* sdiv */
697 COSTS_N_INSNS (1), /* ddiv */
698 128, /* cache line size */
702 0, /* SF->DF convert */
705 /* Instruction costs on RS64A processors. */
707 struct processor_costs rs64a_cost = {
708 COSTS_N_INSNS (20), /* mulsi */
709 COSTS_N_INSNS (12), /* mulsi_const */
710 COSTS_N_INSNS (8), /* mulsi_const9 */
711 COSTS_N_INSNS (34), /* muldi */
712 COSTS_N_INSNS (65), /* divsi */
713 COSTS_N_INSNS (67), /* divdi */
714 COSTS_N_INSNS (4), /* fp */
715 COSTS_N_INSNS (4), /* dmul */
716 COSTS_N_INSNS (31), /* sdiv */
717 COSTS_N_INSNS (31), /* ddiv */
718 128, /* cache line size */
722 0, /* SF->DF convert */
725 /* Instruction costs on MPCCORE processors. */
727 struct processor_costs mpccore_cost = {
728 COSTS_N_INSNS (2), /* mulsi */
729 COSTS_N_INSNS (2), /* mulsi_const */
730 COSTS_N_INSNS (2), /* mulsi_const9 */
731 COSTS_N_INSNS (2), /* muldi */
732 COSTS_N_INSNS (6), /* divsi */
733 COSTS_N_INSNS (6), /* divdi */
734 COSTS_N_INSNS (4), /* fp */
735 COSTS_N_INSNS (5), /* dmul */
736 COSTS_N_INSNS (10), /* sdiv */
737 COSTS_N_INSNS (17), /* ddiv */
738 32, /* cache line size */
742 0, /* SF->DF convert */
745 /* Instruction costs on PPC403 processors. */
747 struct processor_costs ppc403_cost = {
748 COSTS_N_INSNS (4), /* mulsi */
749 COSTS_N_INSNS (4), /* mulsi_const */
750 COSTS_N_INSNS (4), /* mulsi_const9 */
751 COSTS_N_INSNS (4), /* muldi */
752 COSTS_N_INSNS (33), /* divsi */
753 COSTS_N_INSNS (33), /* divdi */
754 COSTS_N_INSNS (11), /* fp */
755 COSTS_N_INSNS (11), /* dmul */
756 COSTS_N_INSNS (11), /* sdiv */
757 COSTS_N_INSNS (11), /* ddiv */
758 32, /* cache line size */
762 0, /* SF->DF convert */
765 /* Instruction costs on PPC405 processors. */
767 struct processor_costs ppc405_cost = {
768 COSTS_N_INSNS (5), /* mulsi */
769 COSTS_N_INSNS (4), /* mulsi_const */
770 COSTS_N_INSNS (3), /* mulsi_const9 */
771 COSTS_N_INSNS (5), /* muldi */
772 COSTS_N_INSNS (35), /* divsi */
773 COSTS_N_INSNS (35), /* divdi */
774 COSTS_N_INSNS (11), /* fp */
775 COSTS_N_INSNS (11), /* dmul */
776 COSTS_N_INSNS (11), /* sdiv */
777 COSTS_N_INSNS (11), /* ddiv */
778 32, /* cache line size */
782 0, /* SF->DF convert */
785 /* Instruction costs on PPC440 processors. */
787 struct processor_costs ppc440_cost = {
788 COSTS_N_INSNS (3), /* mulsi */
789 COSTS_N_INSNS (2), /* mulsi_const */
790 COSTS_N_INSNS (2), /* mulsi_const9 */
791 COSTS_N_INSNS (3), /* muldi */
792 COSTS_N_INSNS (34), /* divsi */
793 COSTS_N_INSNS (34), /* divdi */
794 COSTS_N_INSNS (5), /* fp */
795 COSTS_N_INSNS (5), /* dmul */
796 COSTS_N_INSNS (19), /* sdiv */
797 COSTS_N_INSNS (33), /* ddiv */
798 32, /* cache line size */
802 0, /* SF->DF convert */
805 /* Instruction costs on PPC476 processors. */
807 struct processor_costs ppc476_cost = {
808 COSTS_N_INSNS (4), /* mulsi */
809 COSTS_N_INSNS (4), /* mulsi_const */
810 COSTS_N_INSNS (4), /* mulsi_const9 */
811 COSTS_N_INSNS (4), /* muldi */
812 COSTS_N_INSNS (11), /* divsi */
813 COSTS_N_INSNS (11), /* divdi */
814 COSTS_N_INSNS (6), /* fp */
815 COSTS_N_INSNS (6), /* dmul */
816 COSTS_N_INSNS (19), /* sdiv */
817 COSTS_N_INSNS (33), /* ddiv */
818 32, /* l1 cache line size */
822 0, /* SF->DF convert */
825 /* Instruction costs on PPC601 processors. */
827 struct processor_costs ppc601_cost = {
828 COSTS_N_INSNS (5), /* mulsi */
829 COSTS_N_INSNS (5), /* mulsi_const */
830 COSTS_N_INSNS (5), /* mulsi_const9 */
831 COSTS_N_INSNS (5), /* muldi */
832 COSTS_N_INSNS (36), /* divsi */
833 COSTS_N_INSNS (36), /* divdi */
834 COSTS_N_INSNS (4), /* fp */
835 COSTS_N_INSNS (5), /* dmul */
836 COSTS_N_INSNS (17), /* sdiv */
837 COSTS_N_INSNS (31), /* ddiv */
838 32, /* cache line size */
842 0, /* SF->DF convert */
845 /* Instruction costs on PPC603 processors. */
847 struct processor_costs ppc603_cost = {
848 COSTS_N_INSNS (5), /* mulsi */
849 COSTS_N_INSNS (3), /* mulsi_const */
850 COSTS_N_INSNS (2), /* mulsi_const9 */
851 COSTS_N_INSNS (5), /* muldi */
852 COSTS_N_INSNS (37), /* divsi */
853 COSTS_N_INSNS (37), /* divdi */
854 COSTS_N_INSNS (3), /* fp */
855 COSTS_N_INSNS (4), /* dmul */
856 COSTS_N_INSNS (18), /* sdiv */
857 COSTS_N_INSNS (33), /* ddiv */
858 32, /* cache line size */
862 0, /* SF->DF convert */
865 /* Instruction costs on PPC604 processors. */
867 struct processor_costs ppc604_cost = {
868 COSTS_N_INSNS (4), /* mulsi */
869 COSTS_N_INSNS (4), /* mulsi_const */
870 COSTS_N_INSNS (4), /* mulsi_const9 */
871 COSTS_N_INSNS (4), /* muldi */
872 COSTS_N_INSNS (20), /* divsi */
873 COSTS_N_INSNS (20), /* divdi */
874 COSTS_N_INSNS (3), /* fp */
875 COSTS_N_INSNS (3), /* dmul */
876 COSTS_N_INSNS (18), /* sdiv */
877 COSTS_N_INSNS (32), /* ddiv */
878 32, /* cache line size */
882 0, /* SF->DF convert */
885 /* Instruction costs on PPC604e processors. */
887 struct processor_costs ppc604e_cost = {
888 COSTS_N_INSNS (2), /* mulsi */
889 COSTS_N_INSNS (2), /* mulsi_const */
890 COSTS_N_INSNS (2), /* mulsi_const9 */
891 COSTS_N_INSNS (2), /* muldi */
892 COSTS_N_INSNS (20), /* divsi */
893 COSTS_N_INSNS (20), /* divdi */
894 COSTS_N_INSNS (3), /* fp */
895 COSTS_N_INSNS (3), /* dmul */
896 COSTS_N_INSNS (18), /* sdiv */
897 COSTS_N_INSNS (32), /* ddiv */
898 32, /* cache line size */
902 0, /* SF->DF convert */
905 /* Instruction costs on PPC620 processors. */
907 struct processor_costs ppc620_cost = {
908 COSTS_N_INSNS (5), /* mulsi */
909 COSTS_N_INSNS (4), /* mulsi_const */
910 COSTS_N_INSNS (3), /* mulsi_const9 */
911 COSTS_N_INSNS (7), /* muldi */
912 COSTS_N_INSNS (21), /* divsi */
913 COSTS_N_INSNS (37), /* divdi */
914 COSTS_N_INSNS (3), /* fp */
915 COSTS_N_INSNS (3), /* dmul */
916 COSTS_N_INSNS (18), /* sdiv */
917 COSTS_N_INSNS (32), /* ddiv */
918 128, /* cache line size */
922 0, /* SF->DF convert */
925 /* Instruction costs on PPC630 processors. */
927 struct processor_costs ppc630_cost = {
928 COSTS_N_INSNS (5), /* mulsi */
929 COSTS_N_INSNS (4), /* mulsi_const */
930 COSTS_N_INSNS (3), /* mulsi_const9 */
931 COSTS_N_INSNS (7), /* muldi */
932 COSTS_N_INSNS (21), /* divsi */
933 COSTS_N_INSNS (37), /* divdi */
934 COSTS_N_INSNS (3), /* fp */
935 COSTS_N_INSNS (3), /* dmul */
936 COSTS_N_INSNS (17), /* sdiv */
937 COSTS_N_INSNS (21), /* ddiv */
938 128, /* cache line size */
942 0, /* SF->DF convert */
945 /* Instruction costs on Cell processor. */
946 /* COSTS_N_INSNS (1) ~ one add. */
948 struct processor_costs ppccell_cost = {
949 COSTS_N_INSNS (9/2)+2, /* mulsi */
950 COSTS_N_INSNS (6/2), /* mulsi_const */
951 COSTS_N_INSNS (6/2), /* mulsi_const9 */
952 COSTS_N_INSNS (15/2)+2, /* muldi */
953 COSTS_N_INSNS (38/2), /* divsi */
954 COSTS_N_INSNS (70/2), /* divdi */
955 COSTS_N_INSNS (10/2), /* fp */
956 COSTS_N_INSNS (10/2), /* dmul */
957 COSTS_N_INSNS (74/2), /* sdiv */
958 COSTS_N_INSNS (74/2), /* ddiv */
959 128, /* cache line size */
963 0, /* SF->DF convert */
966 /* Instruction costs on PPC750 and PPC7400 processors. */
968 struct processor_costs ppc750_cost = {
969 COSTS_N_INSNS (5), /* mulsi */
970 COSTS_N_INSNS (3), /* mulsi_const */
971 COSTS_N_INSNS (2), /* mulsi_const9 */
972 COSTS_N_INSNS (5), /* muldi */
973 COSTS_N_INSNS (17), /* divsi */
974 COSTS_N_INSNS (17), /* divdi */
975 COSTS_N_INSNS (3), /* fp */
976 COSTS_N_INSNS (3), /* dmul */
977 COSTS_N_INSNS (17), /* sdiv */
978 COSTS_N_INSNS (31), /* ddiv */
979 32, /* cache line size */
983 0, /* SF->DF convert */
986 /* Instruction costs on PPC7450 processors. */
988 struct processor_costs ppc7450_cost = {
989 COSTS_N_INSNS (4), /* mulsi */
990 COSTS_N_INSNS (3), /* mulsi_const */
991 COSTS_N_INSNS (3), /* mulsi_const9 */
992 COSTS_N_INSNS (4), /* muldi */
993 COSTS_N_INSNS (23), /* divsi */
994 COSTS_N_INSNS (23), /* divdi */
995 COSTS_N_INSNS (5), /* fp */
996 COSTS_N_INSNS (5), /* dmul */
997 COSTS_N_INSNS (21), /* sdiv */
998 COSTS_N_INSNS (35), /* ddiv */
999 32, /* cache line size */
1001 1024, /* l2 cache */
1003 0, /* SF->DF convert */
1006 /* Instruction costs on PPC8540 processors. */
1008 struct processor_costs ppc8540_cost = {
1009 COSTS_N_INSNS (4), /* mulsi */
1010 COSTS_N_INSNS (4), /* mulsi_const */
1011 COSTS_N_INSNS (4), /* mulsi_const9 */
1012 COSTS_N_INSNS (4), /* muldi */
1013 COSTS_N_INSNS (19), /* divsi */
1014 COSTS_N_INSNS (19), /* divdi */
1015 COSTS_N_INSNS (4), /* fp */
1016 COSTS_N_INSNS (4), /* dmul */
1017 COSTS_N_INSNS (29), /* sdiv */
1018 COSTS_N_INSNS (29), /* ddiv */
1019 32, /* cache line size */
1022 1, /* prefetch streams /*/
1023 0, /* SF->DF convert */
1026 /* Instruction costs on E300C2 and E300C3 cores. */
1028 struct processor_costs ppce300c2c3_cost = {
1029 COSTS_N_INSNS (4), /* mulsi */
1030 COSTS_N_INSNS (4), /* mulsi_const */
1031 COSTS_N_INSNS (4), /* mulsi_const9 */
1032 COSTS_N_INSNS (4), /* muldi */
1033 COSTS_N_INSNS (19), /* divsi */
1034 COSTS_N_INSNS (19), /* divdi */
1035 COSTS_N_INSNS (3), /* fp */
1036 COSTS_N_INSNS (4), /* dmul */
1037 COSTS_N_INSNS (18), /* sdiv */
1038 COSTS_N_INSNS (33), /* ddiv */
1042 1, /* prefetch streams /*/
1043 0, /* SF->DF convert */
1046 /* Instruction costs on PPCE500MC processors. */
1048 struct processor_costs ppce500mc_cost = {
1049 COSTS_N_INSNS (4), /* mulsi */
1050 COSTS_N_INSNS (4), /* mulsi_const */
1051 COSTS_N_INSNS (4), /* mulsi_const9 */
1052 COSTS_N_INSNS (4), /* muldi */
1053 COSTS_N_INSNS (14), /* divsi */
1054 COSTS_N_INSNS (14), /* divdi */
1055 COSTS_N_INSNS (8), /* fp */
1056 COSTS_N_INSNS (10), /* dmul */
1057 COSTS_N_INSNS (36), /* sdiv */
1058 COSTS_N_INSNS (66), /* ddiv */
1059 64, /* cache line size */
1062 1, /* prefetch streams /*/
1063 0, /* SF->DF convert */
1066 /* Instruction costs on PPCE500MC64 processors. */
1068 struct processor_costs ppce500mc64_cost = {
1069 COSTS_N_INSNS (4), /* mulsi */
1070 COSTS_N_INSNS (4), /* mulsi_const */
1071 COSTS_N_INSNS (4), /* mulsi_const9 */
1072 COSTS_N_INSNS (4), /* muldi */
1073 COSTS_N_INSNS (14), /* divsi */
1074 COSTS_N_INSNS (14), /* divdi */
1075 COSTS_N_INSNS (4), /* fp */
1076 COSTS_N_INSNS (10), /* dmul */
1077 COSTS_N_INSNS (36), /* sdiv */
1078 COSTS_N_INSNS (66), /* ddiv */
1079 64, /* cache line size */
1082 1, /* prefetch streams /*/
1083 0, /* SF->DF convert */
1086 /* Instruction costs on PPCE5500 processors. */
1088 struct processor_costs ppce5500_cost = {
1089 COSTS_N_INSNS (5), /* mulsi */
1090 COSTS_N_INSNS (5), /* mulsi_const */
1091 COSTS_N_INSNS (4), /* mulsi_const9 */
1092 COSTS_N_INSNS (5), /* muldi */
1093 COSTS_N_INSNS (14), /* divsi */
1094 COSTS_N_INSNS (14), /* divdi */
1095 COSTS_N_INSNS (7), /* fp */
1096 COSTS_N_INSNS (10), /* dmul */
1097 COSTS_N_INSNS (36), /* sdiv */
1098 COSTS_N_INSNS (66), /* ddiv */
1099 64, /* cache line size */
1102 1, /* prefetch streams /*/
1103 0, /* SF->DF convert */
1106 /* Instruction costs on PPCE6500 processors. */
1108 struct processor_costs ppce6500_cost = {
1109 COSTS_N_INSNS (5), /* mulsi */
1110 COSTS_N_INSNS (5), /* mulsi_const */
1111 COSTS_N_INSNS (4), /* mulsi_const9 */
1112 COSTS_N_INSNS (5), /* muldi */
1113 COSTS_N_INSNS (14), /* divsi */
1114 COSTS_N_INSNS (14), /* divdi */
1115 COSTS_N_INSNS (7), /* fp */
1116 COSTS_N_INSNS (10), /* dmul */
1117 COSTS_N_INSNS (36), /* sdiv */
1118 COSTS_N_INSNS (66), /* ddiv */
1119 64, /* cache line size */
1122 1, /* prefetch streams /*/
1123 0, /* SF->DF convert */
1126 /* Instruction costs on AppliedMicro Titan processors. */
1128 struct processor_costs titan_cost = {
1129 COSTS_N_INSNS (5), /* mulsi */
1130 COSTS_N_INSNS (5), /* mulsi_const */
1131 COSTS_N_INSNS (5), /* mulsi_const9 */
1132 COSTS_N_INSNS (5), /* muldi */
1133 COSTS_N_INSNS (18), /* divsi */
1134 COSTS_N_INSNS (18), /* divdi */
1135 COSTS_N_INSNS (10), /* fp */
1136 COSTS_N_INSNS (10), /* dmul */
1137 COSTS_N_INSNS (46), /* sdiv */
1138 COSTS_N_INSNS (72), /* ddiv */
1139 32, /* cache line size */
1142 1, /* prefetch streams /*/
1143 0, /* SF->DF convert */
1146 /* Instruction costs on POWER4 and POWER5 processors. */
1148 struct processor_costs power4_cost = {
1149 COSTS_N_INSNS (3), /* mulsi */
1150 COSTS_N_INSNS (2), /* mulsi_const */
1151 COSTS_N_INSNS (2), /* mulsi_const9 */
1152 COSTS_N_INSNS (4), /* muldi */
1153 COSTS_N_INSNS (18), /* divsi */
1154 COSTS_N_INSNS (34), /* divdi */
1155 COSTS_N_INSNS (3), /* fp */
1156 COSTS_N_INSNS (3), /* dmul */
1157 COSTS_N_INSNS (17), /* sdiv */
1158 COSTS_N_INSNS (17), /* ddiv */
1159 128, /* cache line size */
1161 1024, /* l2 cache */
1162 8, /* prefetch streams /*/
1163 0, /* SF->DF convert */
1166 /* Instruction costs on POWER6 processors. */
1168 struct processor_costs power6_cost = {
1169 COSTS_N_INSNS (8), /* mulsi */
1170 COSTS_N_INSNS (8), /* mulsi_const */
1171 COSTS_N_INSNS (8), /* mulsi_const9 */
1172 COSTS_N_INSNS (8), /* muldi */
1173 COSTS_N_INSNS (22), /* divsi */
1174 COSTS_N_INSNS (28), /* divdi */
1175 COSTS_N_INSNS (3), /* fp */
1176 COSTS_N_INSNS (3), /* dmul */
1177 COSTS_N_INSNS (13), /* sdiv */
1178 COSTS_N_INSNS (16), /* ddiv */
1179 128, /* cache line size */
1181 2048, /* l2 cache */
1182 16, /* prefetch streams */
1183 0, /* SF->DF convert */
1186 /* Instruction costs on POWER7 processors. */
1188 struct processor_costs power7_cost = {
1189 COSTS_N_INSNS (2), /* mulsi */
1190 COSTS_N_INSNS (2), /* mulsi_const */
1191 COSTS_N_INSNS (2), /* mulsi_const9 */
1192 COSTS_N_INSNS (2), /* muldi */
1193 COSTS_N_INSNS (18), /* divsi */
1194 COSTS_N_INSNS (34), /* divdi */
1195 COSTS_N_INSNS (3), /* fp */
1196 COSTS_N_INSNS (3), /* dmul */
1197 COSTS_N_INSNS (13), /* sdiv */
1198 COSTS_N_INSNS (16), /* ddiv */
1199 128, /* cache line size */
1202 12, /* prefetch streams */
1203 COSTS_N_INSNS (3), /* SF->DF convert */
1206 /* Instruction costs on POWER8 processors. */
1208 struct processor_costs power8_cost = {
1209 COSTS_N_INSNS (3), /* mulsi */
1210 COSTS_N_INSNS (3), /* mulsi_const */
1211 COSTS_N_INSNS (3), /* mulsi_const9 */
1212 COSTS_N_INSNS (3), /* muldi */
1213 COSTS_N_INSNS (19), /* divsi */
1214 COSTS_N_INSNS (35), /* divdi */
1215 COSTS_N_INSNS (3), /* fp */
1216 COSTS_N_INSNS (3), /* dmul */
1217 COSTS_N_INSNS (14), /* sdiv */
1218 COSTS_N_INSNS (17), /* ddiv */
1219 128, /* cache line size */
1222 12, /* prefetch streams */
1223 COSTS_N_INSNS (3), /* SF->DF convert */
1226 /* Instruction costs on POWER9 processors. */
1228 struct processor_costs power9_cost = {
1229 COSTS_N_INSNS (3), /* mulsi */
1230 COSTS_N_INSNS (3), /* mulsi_const */
1231 COSTS_N_INSNS (3), /* mulsi_const9 */
1232 COSTS_N_INSNS (3), /* muldi */
1233 COSTS_N_INSNS (8), /* divsi */
1234 COSTS_N_INSNS (12), /* divdi */
1235 COSTS_N_INSNS (3), /* fp */
1236 COSTS_N_INSNS (3), /* dmul */
1237 COSTS_N_INSNS (13), /* sdiv */
1238 COSTS_N_INSNS (18), /* ddiv */
1239 128, /* cache line size */
1242 8, /* prefetch streams */
1243 COSTS_N_INSNS (3), /* SF->DF convert */
1246 /* Instruction costs on POWER A2 processors. */
1248 struct processor_costs ppca2_cost = {
1249 COSTS_N_INSNS (16), /* mulsi */
1250 COSTS_N_INSNS (16), /* mulsi_const */
1251 COSTS_N_INSNS (16), /* mulsi_const9 */
1252 COSTS_N_INSNS (16), /* muldi */
1253 COSTS_N_INSNS (22), /* divsi */
1254 COSTS_N_INSNS (28), /* divdi */
1255 COSTS_N_INSNS (3), /* fp */
1256 COSTS_N_INSNS (3), /* dmul */
1257 COSTS_N_INSNS (59), /* sdiv */
1258 COSTS_N_INSNS (72), /* ddiv */
1261 2048, /* l2 cache */
1262 16, /* prefetch streams */
1263 0, /* SF->DF convert */
1267 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1268 #undef RS6000_BUILTIN_0
1269 #undef RS6000_BUILTIN_1
1270 #undef RS6000_BUILTIN_2
1271 #undef RS6000_BUILTIN_3
1272 #undef RS6000_BUILTIN_A
1273 #undef RS6000_BUILTIN_D
1274 #undef RS6000_BUILTIN_H
1275 #undef RS6000_BUILTIN_P
1276 #undef RS6000_BUILTIN_X
1278 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1279 { NAME, ICODE, MASK, ATTR },
1281 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1282 { NAME, ICODE, MASK, ATTR },
1284 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1285 { NAME, ICODE, MASK, ATTR },
1287 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1288 { NAME, ICODE, MASK, ATTR },
1290 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1291 { NAME, ICODE, MASK, ATTR },
1293 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1294 { NAME, ICODE, MASK, ATTR },
1296 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1297 { NAME, ICODE, MASK, ATTR },
1299 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1300 { NAME, ICODE, MASK, ATTR },
1302 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1303 { NAME, ICODE, MASK, ATTR },
1305 struct rs6000_builtin_info_type {
1307 const enum insn_code icode;
1308 const HOST_WIDE_INT mask;
1309 const unsigned attr;
1312 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1314 #include "rs6000-builtin.def"
1317 #undef RS6000_BUILTIN_0
1318 #undef RS6000_BUILTIN_1
1319 #undef RS6000_BUILTIN_2
1320 #undef RS6000_BUILTIN_3
1321 #undef RS6000_BUILTIN_A
1322 #undef RS6000_BUILTIN_D
1323 #undef RS6000_BUILTIN_H
1324 #undef RS6000_BUILTIN_P
1325 #undef RS6000_BUILTIN_X
1327 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1328 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1331 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1332 static struct machine_function * rs6000_init_machine_status (void);
1333 static int rs6000_ra_ever_killed (void);
1334 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1335 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1336 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1337 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1338 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1339 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1340 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1341 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1343 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1345 static bool is_microcoded_insn (rtx_insn *);
1346 static bool is_nonpipeline_insn (rtx_insn *);
1347 static bool is_cracked_insn (rtx_insn *);
1348 static bool is_load_insn (rtx, rtx *);
1349 static bool is_store_insn (rtx, rtx *);
1350 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1351 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1352 static bool insn_must_be_first_in_group (rtx_insn *);
1353 static bool insn_must_be_last_in_group (rtx_insn *);
1354 static void altivec_init_builtins (void);
1355 static tree builtin_function_type (machine_mode, machine_mode,
1356 machine_mode, machine_mode,
1357 enum rs6000_builtins, const char *name);
1358 static void rs6000_common_init_builtins (void);
1359 static void htm_init_builtins (void);
1360 static rs6000_stack_t *rs6000_stack_info (void);
1361 static void is_altivec_return_reg (rtx, void *);
1362 int easy_vector_constant (rtx, machine_mode);
1363 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1364 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1365 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1368 static void macho_branch_islands (void);
1369 static tree get_prev_label (tree);
1371 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1373 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1375 static bool rs6000_mode_dependent_address (const_rtx);
1376 static bool rs6000_debug_mode_dependent_address (const_rtx);
1377 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1378 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1380 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1383 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1384 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1386 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1389 static bool rs6000_debug_can_change_mode_class (machine_mode,
1392 static bool rs6000_save_toc_in_prologue_p (void);
1393 static rtx rs6000_internal_arg_pointer (void);
1395 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1397 = rs6000_legitimize_reload_address;
1399 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1400 = rs6000_mode_dependent_address;
1402 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1404 = rs6000_secondary_reload_class;
1406 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1407 = rs6000_preferred_reload_class;
1409 const int INSN_NOT_AVAILABLE = -1;
1411 static void rs6000_print_isa_options (FILE *, int, const char *,
1413 static void rs6000_print_builtin_options (FILE *, int, const char *,
1415 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1417 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1418 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1419 enum rs6000_reg_type,
1421 secondary_reload_info *,
1423 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1424 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1425 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1427 /* Hash table stuff for keeping track of TOC entries. */
1429 struct GTY((for_user)) toc_hash_struct
1431 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1432 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1434 machine_mode key_mode;
1438 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1440 static hashval_t hash (toc_hash_struct *);
1441 static bool equal (toc_hash_struct *, toc_hash_struct *);
1444 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1446 /* Hash table to keep track of the argument types for builtin functions. */
1448 struct GTY((for_user)) builtin_hash_struct
1451 machine_mode mode[4]; /* return value + 3 arguments. */
1452 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1455 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1457 static hashval_t hash (builtin_hash_struct *);
1458 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1461 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1464 /* Default register names. */
1465 char rs6000_reg_names[][8] =
1467 "0", "1", "2", "3", "4", "5", "6", "7",
1468 "8", "9", "10", "11", "12", "13", "14", "15",
1469 "16", "17", "18", "19", "20", "21", "22", "23",
1470 "24", "25", "26", "27", "28", "29", "30", "31",
1471 "0", "1", "2", "3", "4", "5", "6", "7",
1472 "8", "9", "10", "11", "12", "13", "14", "15",
1473 "16", "17", "18", "19", "20", "21", "22", "23",
1474 "24", "25", "26", "27", "28", "29", "30", "31",
1475 "mq", "lr", "ctr","ap",
1476 "0", "1", "2", "3", "4", "5", "6", "7",
1478 /* AltiVec registers. */
1479 "0", "1", "2", "3", "4", "5", "6", "7",
1480 "8", "9", "10", "11", "12", "13", "14", "15",
1481 "16", "17", "18", "19", "20", "21", "22", "23",
1482 "24", "25", "26", "27", "28", "29", "30", "31",
1484 /* Soft frame pointer. */
1486 /* HTM SPR registers. */
1487 "tfhar", "tfiar", "texasr"
1490 #ifdef TARGET_REGNAMES
1491 static const char alt_reg_names[][8] =
1493 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1494 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1495 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1496 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1497 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1498 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1499 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1500 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1501 "mq", "lr", "ctr", "ap",
1502 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1504 /* AltiVec registers. */
1505 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1506 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1507 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1508 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1510 /* Soft frame pointer. */
1512 /* HTM SPR registers. */
1513 "tfhar", "tfiar", "texasr"
1517 /* Table of valid machine attributes. */
1519 static const struct attribute_spec rs6000_attribute_table[] =
1521 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1522 affects_type_identity, handler, exclude } */
1523 { "altivec", 1, 1, false, true, false, false,
1524 rs6000_handle_altivec_attribute, NULL },
1525 { "longcall", 0, 0, false, true, true, false,
1526 rs6000_handle_longcall_attribute, NULL },
1527 { "shortcall", 0, 0, false, true, true, false,
1528 rs6000_handle_longcall_attribute, NULL },
1529 { "ms_struct", 0, 0, false, false, false, false,
1530 rs6000_handle_struct_attribute, NULL },
1531 { "gcc_struct", 0, 0, false, false, false, false,
1532 rs6000_handle_struct_attribute, NULL },
1533 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1534 SUBTARGET_ATTRIBUTE_TABLE,
1536 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1539 #ifndef TARGET_PROFILE_KERNEL
1540 #define TARGET_PROFILE_KERNEL 0
1543 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1544 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1546 /* Initialize the GCC target structure. */
1547 #undef TARGET_ATTRIBUTE_TABLE
1548 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1549 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1550 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1551 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1552 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1554 #undef TARGET_ASM_ALIGNED_DI_OP
1555 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1557 /* Default unaligned ops are only provided for ELF. Find the ops needed
1558 for non-ELF systems. */
1559 #ifndef OBJECT_FORMAT_ELF
1561 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1563 #undef TARGET_ASM_UNALIGNED_HI_OP
1564 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1565 #undef TARGET_ASM_UNALIGNED_SI_OP
1566 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1567 #undef TARGET_ASM_UNALIGNED_DI_OP
1568 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1571 #undef TARGET_ASM_UNALIGNED_HI_OP
1572 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1573 #undef TARGET_ASM_UNALIGNED_SI_OP
1574 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1575 #undef TARGET_ASM_UNALIGNED_DI_OP
1576 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1577 #undef TARGET_ASM_ALIGNED_DI_OP
1578 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1582 /* This hook deals with fixups for relocatable code and DI-mode objects
1584 #undef TARGET_ASM_INTEGER
1585 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1587 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1588 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1589 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1592 #undef TARGET_SET_UP_BY_PROLOGUE
1593 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1595 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1596 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1597 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1598 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1599 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1600 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1601 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1602 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1603 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1604 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1605 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1606 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1608 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1609 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1611 #undef TARGET_INTERNAL_ARG_POINTER
1612 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1614 #undef TARGET_HAVE_TLS
1615 #define TARGET_HAVE_TLS HAVE_AS_TLS
1617 #undef TARGET_CANNOT_FORCE_CONST_MEM
1618 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1620 #undef TARGET_DELEGITIMIZE_ADDRESS
1621 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1623 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1624 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1626 #undef TARGET_LEGITIMATE_COMBINED_INSN
1627 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1629 #undef TARGET_ASM_FUNCTION_PROLOGUE
1630 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1631 #undef TARGET_ASM_FUNCTION_EPILOGUE
1632 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1634 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1635 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1637 #undef TARGET_LEGITIMIZE_ADDRESS
1638 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1640 #undef TARGET_SCHED_VARIABLE_ISSUE
1641 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1643 #undef TARGET_SCHED_ISSUE_RATE
1644 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1645 #undef TARGET_SCHED_ADJUST_COST
1646 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1647 #undef TARGET_SCHED_ADJUST_PRIORITY
1648 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1649 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1650 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1651 #undef TARGET_SCHED_INIT
1652 #define TARGET_SCHED_INIT rs6000_sched_init
1653 #undef TARGET_SCHED_FINISH
1654 #define TARGET_SCHED_FINISH rs6000_sched_finish
1655 #undef TARGET_SCHED_REORDER
1656 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1657 #undef TARGET_SCHED_REORDER2
1658 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1660 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1661 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1663 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1664 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1666 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1667 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1668 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1669 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1670 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1671 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1672 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1673 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1675 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1676 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1678 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1679 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1680 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1681 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1682 rs6000_builtin_support_vector_misalignment
1683 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1684 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1685 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1686 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1687 rs6000_builtin_vectorization_cost
1688 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1689 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1690 rs6000_preferred_simd_mode
1691 #undef TARGET_VECTORIZE_INIT_COST
1692 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1693 #undef TARGET_VECTORIZE_ADD_STMT_COST
1694 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1695 #undef TARGET_VECTORIZE_FINISH_COST
1696 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1697 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1698 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1700 #undef TARGET_INIT_BUILTINS
1701 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1702 #undef TARGET_BUILTIN_DECL
1703 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1705 #undef TARGET_FOLD_BUILTIN
1706 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1707 #undef TARGET_GIMPLE_FOLD_BUILTIN
1708 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1710 #undef TARGET_EXPAND_BUILTIN
1711 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1713 #undef TARGET_MANGLE_TYPE
1714 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1716 #undef TARGET_INIT_LIBFUNCS
1717 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1720 #undef TARGET_BINDS_LOCAL_P
1721 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1724 #undef TARGET_MS_BITFIELD_LAYOUT_P
1725 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1727 #undef TARGET_ASM_OUTPUT_MI_THUNK
1728 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1730 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1731 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1733 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1734 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1736 #undef TARGET_REGISTER_MOVE_COST
1737 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1738 #undef TARGET_MEMORY_MOVE_COST
1739 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1740 #undef TARGET_CANNOT_COPY_INSN_P
1741 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1742 #undef TARGET_RTX_COSTS
1743 #define TARGET_RTX_COSTS rs6000_rtx_costs
1744 #undef TARGET_ADDRESS_COST
1745 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1746 #undef TARGET_INSN_COST
1747 #define TARGET_INSN_COST rs6000_insn_cost
1749 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1750 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1752 #undef TARGET_PROMOTE_FUNCTION_MODE
1753 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1755 #undef TARGET_RETURN_IN_MEMORY
1756 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1758 #undef TARGET_RETURN_IN_MSB
1759 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1761 #undef TARGET_SETUP_INCOMING_VARARGS
1762 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1764 /* Always strict argument naming on rs6000. */
1765 #undef TARGET_STRICT_ARGUMENT_NAMING
1766 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1767 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1768 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1769 #undef TARGET_SPLIT_COMPLEX_ARG
1770 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1771 #undef TARGET_MUST_PASS_IN_STACK
1772 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1773 #undef TARGET_PASS_BY_REFERENCE
1774 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1775 #undef TARGET_ARG_PARTIAL_BYTES
1776 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1777 #undef TARGET_FUNCTION_ARG_ADVANCE
1778 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1779 #undef TARGET_FUNCTION_ARG
1780 #define TARGET_FUNCTION_ARG rs6000_function_arg
1781 #undef TARGET_FUNCTION_ARG_PADDING
1782 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1783 #undef TARGET_FUNCTION_ARG_BOUNDARY
1784 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1786 #undef TARGET_BUILD_BUILTIN_VA_LIST
1787 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1789 #undef TARGET_EXPAND_BUILTIN_VA_START
1790 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1792 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1793 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1795 #undef TARGET_EH_RETURN_FILTER_MODE
1796 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1798 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1799 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1801 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1802 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1804 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1805 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1807 #undef TARGET_FLOATN_MODE
1808 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1810 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1811 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1813 #undef TARGET_MD_ASM_ADJUST
1814 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1816 #undef TARGET_OPTION_OVERRIDE
1817 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1819 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1820 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1821 rs6000_builtin_vectorized_function
1823 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1824 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1825 rs6000_builtin_md_vectorized_function
1827 #undef TARGET_STACK_PROTECT_GUARD
1828 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1831 #undef TARGET_STACK_PROTECT_FAIL
1832 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1836 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1837 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1840 /* Use a 32-bit anchor range. This leads to sequences like:
1842 addis tmp,anchor,high
1845 where tmp itself acts as an anchor, and can be shared between
1846 accesses to the same 64k page. */
1847 #undef TARGET_MIN_ANCHOR_OFFSET
1848 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1849 #undef TARGET_MAX_ANCHOR_OFFSET
1850 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1851 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1852 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1853 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1854 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1856 #undef TARGET_BUILTIN_RECIPROCAL
1857 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1859 #undef TARGET_SECONDARY_RELOAD
1860 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1861 #undef TARGET_SECONDARY_MEMORY_NEEDED
1862 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1863 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1864 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1866 #undef TARGET_LEGITIMATE_ADDRESS_P
1867 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1869 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1870 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1872 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1873 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1875 #undef TARGET_CAN_ELIMINATE
1876 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1878 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1879 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1881 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1882 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1884 #undef TARGET_TRAMPOLINE_INIT
1885 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1887 #undef TARGET_FUNCTION_VALUE
1888 #define TARGET_FUNCTION_VALUE rs6000_function_value
1890 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1891 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1893 #undef TARGET_OPTION_SAVE
1894 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1896 #undef TARGET_OPTION_RESTORE
1897 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1899 #undef TARGET_OPTION_PRINT
1900 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1902 #undef TARGET_CAN_INLINE_P
1903 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1905 #undef TARGET_SET_CURRENT_FUNCTION
1906 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1908 #undef TARGET_LEGITIMATE_CONSTANT_P
1909 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1911 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1912 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1914 #undef TARGET_CAN_USE_DOLOOP_P
1915 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1917 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1918 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1920 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1921 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1922 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1923 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1924 #undef TARGET_UNWIND_WORD_MODE
1925 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1927 #undef TARGET_OFFLOAD_OPTIONS
1928 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1930 #undef TARGET_C_MODE_FOR_SUFFIX
1931 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1933 #undef TARGET_INVALID_BINARY_OP
1934 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1936 #undef TARGET_OPTAB_SUPPORTED_P
1937 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1939 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1940 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1942 #undef TARGET_COMPARE_VERSION_PRIORITY
1943 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1945 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1946 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1947 rs6000_generate_version_dispatcher_body
1949 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1950 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1951 rs6000_get_function_versions_dispatcher
1953 #undef TARGET_OPTION_FUNCTION_VERSIONS
1954 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1956 #undef TARGET_HARD_REGNO_NREGS
1957 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1958 #undef TARGET_HARD_REGNO_MODE_OK
1959 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1961 #undef TARGET_MODES_TIEABLE_P
1962 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1964 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1965 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1966 rs6000_hard_regno_call_part_clobbered
1968 #undef TARGET_SLOW_UNALIGNED_ACCESS
1969 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1971 #undef TARGET_CAN_CHANGE_MODE_CLASS
1972 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1974 #undef TARGET_CONSTANT_ALIGNMENT
1975 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1977 #undef TARGET_STARTING_FRAME_OFFSET
1978 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1980 #if TARGET_ELF && RS6000_WEAK
1981 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1982 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1985 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1986 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1988 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1989 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1992 /* Processor table. */
1995 const char *const name; /* Canonical processor name. */
1996 const enum processor_type processor; /* Processor type enum value. */
1997 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
2000 static struct rs6000_ptt const processor_target_table[] =
2002 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
2003 #include "rs6000-cpus.def"
2007 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
2011 rs6000_cpu_name_lookup (const char *name)
2017 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
2018 if (! strcmp (name, processor_target_table[i].name))
2026 /* Return number of consecutive hard regs needed starting at reg REGNO
2027 to hold something of mode MODE.
2028 This is ordinarily the length in words of a value of mode MODE
2029 but can be less for certain modes in special long registers.
2031 POWER and PowerPC GPRs hold 32 bits worth;
2032 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2035 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
2037 unsigned HOST_WIDE_INT reg_size;
2039 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2040 128-bit floating point that can go in vector registers, which has VSX
2041 memory addressing. */
2042 if (FP_REGNO_P (regno))
2043 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
2044 ? UNITS_PER_VSX_WORD
2045 : UNITS_PER_FP_WORD);
2047 else if (ALTIVEC_REGNO_P (regno))
2048 reg_size = UNITS_PER_ALTIVEC_WORD;
2051 reg_size = UNITS_PER_WORD;
2053 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
2056 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2059 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
2061 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
2063 if (COMPLEX_MODE_P (mode))
2064 mode = GET_MODE_INNER (mode);
2066 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2067 register combinations, and use PTImode where we need to deal with quad
2068 word memory operations. Don't allow quad words in the argument or frame
2069 pointer registers, just registers 0..31. */
2070 if (mode == PTImode)
2071 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2072 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2073 && ((regno & 1) == 0));
2075 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2076 implementations. Don't allow an item to be split between a FP register
2077 and an Altivec register. Allow TImode in all VSX registers if the user
2079 if (TARGET_VSX && VSX_REGNO_P (regno)
2080 && (VECTOR_MEM_VSX_P (mode)
2081 || FLOAT128_VECTOR_P (mode)
2082 || reg_addr[mode].scalar_in_vmx_p
2084 || (TARGET_VADDUQM && mode == V1TImode)))
2086 if (FP_REGNO_P (regno))
2087 return FP_REGNO_P (last_regno);
2089 if (ALTIVEC_REGNO_P (regno))
2091 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2094 return ALTIVEC_REGNO_P (last_regno);
2098 /* The GPRs can hold any mode, but values bigger than one register
2099 cannot go past R31. */
2100 if (INT_REGNO_P (regno))
2101 return INT_REGNO_P (last_regno);
2103 /* The float registers (except for VSX vector modes) can only hold floating
2104 modes and DImode. */
2105 if (FP_REGNO_P (regno))
2107 if (FLOAT128_VECTOR_P (mode))
2110 if (SCALAR_FLOAT_MODE_P (mode)
2111 && (mode != TDmode || (regno % 2) == 0)
2112 && FP_REGNO_P (last_regno))
2115 if (GET_MODE_CLASS (mode) == MODE_INT)
2117 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2120 if (TARGET_P8_VECTOR && (mode == SImode))
2123 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
2130 /* The CR register can only hold CC modes. */
2131 if (CR_REGNO_P (regno))
2132 return GET_MODE_CLASS (mode) == MODE_CC;
2134 if (CA_REGNO_P (regno))
2135 return mode == Pmode || mode == SImode;
2137 /* AltiVec only in AldyVec registers. */
2138 if (ALTIVEC_REGNO_P (regno))
2139 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2140 || mode == V1TImode);
2142 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2143 and it must be able to fit within the register set. */
2145 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2148 /* Implement TARGET_HARD_REGNO_NREGS. */
2151 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2153 return rs6000_hard_regno_nregs[mode][regno];
2156 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2159 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2161 return rs6000_hard_regno_mode_ok_p[mode][regno];
2164 /* Implement TARGET_MODES_TIEABLE_P.
2166 PTImode cannot tie with other modes because PTImode is restricted to even
2167 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2170 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2171 128-bit floating point on VSX systems ties with other vectors. */
2174 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2176 if (mode1 == PTImode)
2177 return mode2 == PTImode;
2178 if (mode2 == PTImode)
2181 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2182 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2183 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2186 if (SCALAR_FLOAT_MODE_P (mode1))
2187 return SCALAR_FLOAT_MODE_P (mode2);
2188 if (SCALAR_FLOAT_MODE_P (mode2))
2191 if (GET_MODE_CLASS (mode1) == MODE_CC)
2192 return GET_MODE_CLASS (mode2) == MODE_CC;
2193 if (GET_MODE_CLASS (mode2) == MODE_CC)
2199 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2202 rs6000_hard_regno_call_part_clobbered (rtx_insn *insn ATTRIBUTE_UNUSED,
2203 unsigned int regno, machine_mode mode)
2207 && GET_MODE_SIZE (mode) > 4
2208 && INT_REGNO_P (regno))
2212 && FP_REGNO_P (regno)
2213 && GET_MODE_SIZE (mode) > 8
2214 && !FLOAT128_2REG_P (mode))
2220 /* Print interesting facts about registers. */
2222 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2226 for (r = first_regno; r <= last_regno; ++r)
2228 const char *comma = "";
2231 if (first_regno == last_regno)
2232 fprintf (stderr, "%s:\t", reg_name);
2234 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2237 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2238 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2242 fprintf (stderr, ",\n\t");
2247 if (rs6000_hard_regno_nregs[m][r] > 1)
2248 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2249 rs6000_hard_regno_nregs[m][r]);
2251 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2256 if (call_used_regs[r])
2260 fprintf (stderr, ",\n\t");
2265 len += fprintf (stderr, "%s%s", comma, "call-used");
2273 fprintf (stderr, ",\n\t");
2278 len += fprintf (stderr, "%s%s", comma, "fixed");
2284 fprintf (stderr, ",\n\t");
2288 len += fprintf (stderr, "%sreg-class = %s", comma,
2289 reg_class_names[(int)rs6000_regno_regclass[r]]);
2294 fprintf (stderr, ",\n\t");
2298 fprintf (stderr, "%sregno = %d\n", comma, r);
2303 rs6000_debug_vector_unit (enum rs6000_vector v)
2309 case VECTOR_NONE: ret = "none"; break;
2310 case VECTOR_ALTIVEC: ret = "altivec"; break;
2311 case VECTOR_VSX: ret = "vsx"; break;
2312 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2313 default: ret = "unknown"; break;
2319 /* Inner function printing just the address mask for a particular reload
2321 DEBUG_FUNCTION char *
2322 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2327 if ((mask & RELOAD_REG_VALID) != 0)
2329 else if (keep_spaces)
2332 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2334 else if (keep_spaces)
2337 if ((mask & RELOAD_REG_INDEXED) != 0)
2339 else if (keep_spaces)
2342 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2344 else if ((mask & RELOAD_REG_OFFSET) != 0)
2346 else if (keep_spaces)
2349 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2351 else if (keep_spaces)
2354 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2356 else if (keep_spaces)
2359 if ((mask & RELOAD_REG_AND_M16) != 0)
2361 else if (keep_spaces)
2369 /* Print the address masks in a human readble fashion. */
2371 rs6000_debug_print_mode (ssize_t m)
2376 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2377 for (rc = 0; rc < N_RELOAD_REG; rc++)
2378 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2379 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2381 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2382 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2384 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2385 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2386 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2390 spaces += sizeof (" Reload=sl") - 1;
2392 if (reg_addr[m].scalar_in_vmx_p)
2394 fprintf (stderr, "%*s Upper=y", spaces, "");
2398 spaces += sizeof (" Upper=y") - 1;
2400 if (rs6000_vector_unit[m] != VECTOR_NONE
2401 || rs6000_vector_mem[m] != VECTOR_NONE)
2403 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2405 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2406 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2409 fputs ("\n", stderr);
2412 #define DEBUG_FMT_ID "%-32s= "
2413 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2414 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2415 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2417 /* Print various interesting information with -mdebug=reg. */
2419 rs6000_debug_reg_global (void)
2421 static const char *const tf[2] = { "false", "true" };
2422 const char *nl = (const char *)0;
2425 char costly_num[20];
2427 char flags_buffer[40];
2428 const char *costly_str;
2429 const char *nop_str;
2430 const char *trace_str;
2431 const char *abi_str;
2432 const char *cmodel_str;
2433 struct cl_target_option cl_opts;
2435 /* Modes we want tieable information on. */
2436 static const machine_mode print_tieable_modes[] = {
2470 /* Virtual regs we are interested in. */
2471 const static struct {
2472 int regno; /* register number. */
2473 const char *name; /* register name. */
2474 } virtual_regs[] = {
2475 { STACK_POINTER_REGNUM, "stack pointer:" },
2476 { TOC_REGNUM, "toc: " },
2477 { STATIC_CHAIN_REGNUM, "static chain: " },
2478 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2479 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2480 { ARG_POINTER_REGNUM, "arg pointer: " },
2481 { FRAME_POINTER_REGNUM, "frame pointer:" },
2482 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2483 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2484 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2485 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2486 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2487 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2488 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2489 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2490 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2493 fputs ("\nHard register information:\n", stderr);
2494 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2495 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2496 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2499 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2500 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2501 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2502 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2503 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2504 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2506 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2507 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2508 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2512 "d reg_class = %s\n"
2513 "f reg_class = %s\n"
2514 "v reg_class = %s\n"
2515 "wa reg_class = %s\n"
2516 "wb reg_class = %s\n"
2517 "wd reg_class = %s\n"
2518 "we reg_class = %s\n"
2519 "wf reg_class = %s\n"
2520 "wg reg_class = %s\n"
2521 "wh reg_class = %s\n"
2522 "wi reg_class = %s\n"
2523 "wj reg_class = %s\n"
2524 "wk reg_class = %s\n"
2525 "wl reg_class = %s\n"
2526 "wm reg_class = %s\n"
2527 "wo reg_class = %s\n"
2528 "wp reg_class = %s\n"
2529 "wq reg_class = %s\n"
2530 "wr reg_class = %s\n"
2531 "ws reg_class = %s\n"
2532 "wt reg_class = %s\n"
2533 "wu reg_class = %s\n"
2534 "wv reg_class = %s\n"
2535 "ww reg_class = %s\n"
2536 "wx reg_class = %s\n"
2537 "wy reg_class = %s\n"
2538 "wz reg_class = %s\n"
2539 "wA reg_class = %s\n"
2540 "wH reg_class = %s\n"
2541 "wI reg_class = %s\n"
2542 "wJ reg_class = %s\n"
2543 "wK reg_class = %s\n"
2545 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2546 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2547 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2548 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2549 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2550 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2551 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2552 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2553 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2554 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2555 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2556 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2557 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2558 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2559 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2560 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2561 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2562 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2563 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2564 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2565 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2566 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2567 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2568 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2569 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2570 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2571 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2572 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2573 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2574 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2575 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2576 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2579 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2580 rs6000_debug_print_mode (m);
2582 fputs ("\n", stderr);
2584 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2586 machine_mode mode1 = print_tieable_modes[m1];
2587 bool first_time = true;
2589 nl = (const char *)0;
2590 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2592 machine_mode mode2 = print_tieable_modes[m2];
2593 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2597 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2602 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2607 fputs ("\n", stderr);
2613 if (rs6000_recip_control)
2615 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2617 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2618 if (rs6000_recip_bits[m])
2621 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2623 (RS6000_RECIP_AUTO_RE_P (m)
2625 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2626 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2628 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2631 fputs ("\n", stderr);
2634 if (rs6000_cpu_index >= 0)
2636 const char *name = processor_target_table[rs6000_cpu_index].name;
2638 = processor_target_table[rs6000_cpu_index].target_enable;
2640 sprintf (flags_buffer, "-mcpu=%s flags", name);
2641 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2644 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2646 if (rs6000_tune_index >= 0)
2648 const char *name = processor_target_table[rs6000_tune_index].name;
2650 = processor_target_table[rs6000_tune_index].target_enable;
2652 sprintf (flags_buffer, "-mtune=%s flags", name);
2653 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2656 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2658 cl_target_option_save (&cl_opts, &global_options);
2659 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2662 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2663 rs6000_isa_flags_explicit);
2665 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2666 rs6000_builtin_mask);
2668 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2670 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2671 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2673 switch (rs6000_sched_costly_dep)
2675 case max_dep_latency:
2676 costly_str = "max_dep_latency";
2680 costly_str = "no_dep_costly";
2683 case all_deps_costly:
2684 costly_str = "all_deps_costly";
2687 case true_store_to_load_dep_costly:
2688 costly_str = "true_store_to_load_dep_costly";
2691 case store_to_load_dep_costly:
2692 costly_str = "store_to_load_dep_costly";
2696 costly_str = costly_num;
2697 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2701 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2703 switch (rs6000_sched_insert_nops)
2705 case sched_finish_regroup_exact:
2706 nop_str = "sched_finish_regroup_exact";
2709 case sched_finish_pad_groups:
2710 nop_str = "sched_finish_pad_groups";
2713 case sched_finish_none:
2714 nop_str = "sched_finish_none";
2719 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2723 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2725 switch (rs6000_sdata)
2732 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2736 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2740 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2745 switch (rs6000_traceback)
2747 case traceback_default: trace_str = "default"; break;
2748 case traceback_none: trace_str = "none"; break;
2749 case traceback_part: trace_str = "part"; break;
2750 case traceback_full: trace_str = "full"; break;
2751 default: trace_str = "unknown"; break;
2754 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2756 switch (rs6000_current_cmodel)
2758 case CMODEL_SMALL: cmodel_str = "small"; break;
2759 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2760 case CMODEL_LARGE: cmodel_str = "large"; break;
2761 default: cmodel_str = "unknown"; break;
2764 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2766 switch (rs6000_current_abi)
2768 case ABI_NONE: abi_str = "none"; break;
2769 case ABI_AIX: abi_str = "aix"; break;
2770 case ABI_ELFv2: abi_str = "ELFv2"; break;
2771 case ABI_V4: abi_str = "V4"; break;
2772 case ABI_DARWIN: abi_str = "darwin"; break;
2773 default: abi_str = "unknown"; break;
2776 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2778 if (rs6000_altivec_abi)
2779 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2781 if (rs6000_darwin64_abi)
2782 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2784 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2785 (TARGET_SOFT_FLOAT ? "true" : "false"));
2787 if (TARGET_LINK_STACK)
2788 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2790 if (TARGET_P8_FUSION)
2794 strcpy (options, "power8");
2795 if (TARGET_P8_FUSION_SIGN)
2796 strcat (options, ", sign");
2798 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2801 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2802 TARGET_SECURE_PLT ? "secure" : "bss");
2803 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2804 aix_struct_return ? "aix" : "sysv");
2805 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2806 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2807 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2808 tf[!!rs6000_align_branch_targets]);
2809 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2810 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2811 rs6000_long_double_type_size);
2812 if (rs6000_long_double_type_size > 64)
2814 fprintf (stderr, DEBUG_FMT_S, "long double type",
2815 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2816 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2817 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2819 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2820 (int)rs6000_sched_restricted_insns_priority);
2821 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2823 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2824 (int)RS6000_BUILTIN_COUNT);
2826 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2827 (int)TARGET_FLOAT128_ENABLE_TYPE);
2830 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2831 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2833 if (TARGET_DIRECT_MOVE_128)
2834 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2835 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2839 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2840 legitimate address support to figure out the appropriate addressing to
2844 rs6000_setup_reg_addr_masks (void)
2846 ssize_t rc, reg, m, nregs;
2847 addr_mask_type any_addr_mask, addr_mask;
2849 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2851 machine_mode m2 = (machine_mode) m;
2852 bool complex_p = false;
2853 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2856 if (COMPLEX_MODE_P (m2))
2859 m2 = GET_MODE_INNER (m2);
2862 msize = GET_MODE_SIZE (m2);
2864 /* SDmode is special in that we want to access it only via REG+REG
2865 addressing on power7 and above, since we want to use the LFIWZX and
2866 STFIWZX instructions to load it. */
2867 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2870 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2873 reg = reload_reg_map[rc].reg;
2875 /* Can mode values go in the GPR/FPR/Altivec registers? */
2876 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2878 bool small_int_vsx_p = (small_int_p
2879 && (rc == RELOAD_REG_FPR
2880 || rc == RELOAD_REG_VMX));
2882 nregs = rs6000_hard_regno_nregs[m][reg];
2883 addr_mask |= RELOAD_REG_VALID;
2885 /* Indicate if the mode takes more than 1 physical register. If
2886 it takes a single register, indicate it can do REG+REG
2887 addressing. Small integers in VSX registers can only do
2888 REG+REG addressing. */
2889 if (small_int_vsx_p)
2890 addr_mask |= RELOAD_REG_INDEXED;
2891 else if (nregs > 1 || m == BLKmode || complex_p)
2892 addr_mask |= RELOAD_REG_MULTIPLE;
2894 addr_mask |= RELOAD_REG_INDEXED;
2896 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2897 addressing. If we allow scalars into Altivec registers,
2898 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2900 For VSX systems, we don't allow update addressing for
2901 DFmode/SFmode if those registers can go in both the
2902 traditional floating point registers and Altivec registers.
2903 The load/store instructions for the Altivec registers do not
2904 have update forms. If we allowed update addressing, it seems
2905 to break IV-OPT code using floating point if the index type is
2906 int instead of long (PR target/81550 and target/84042). */
2909 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2911 && !VECTOR_MODE_P (m2)
2912 && !FLOAT128_VECTOR_P (m2)
2914 && (m != E_DFmode || !TARGET_VSX)
2915 && (m != E_SFmode || !TARGET_P8_VECTOR)
2916 && !small_int_vsx_p)
2918 addr_mask |= RELOAD_REG_PRE_INCDEC;
2920 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2921 we don't allow PRE_MODIFY for some multi-register
2926 addr_mask |= RELOAD_REG_PRE_MODIFY;
2930 if (TARGET_POWERPC64)
2931 addr_mask |= RELOAD_REG_PRE_MODIFY;
2936 if (TARGET_HARD_FLOAT)
2937 addr_mask |= RELOAD_REG_PRE_MODIFY;
2943 /* GPR and FPR registers can do REG+OFFSET addressing, except
2944 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2945 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2946 if ((addr_mask != 0) && !indexed_only_p
2948 && (rc == RELOAD_REG_GPR
2949 || ((msize == 8 || m2 == SFmode)
2950 && (rc == RELOAD_REG_FPR
2951 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2952 addr_mask |= RELOAD_REG_OFFSET;
2954 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2955 instructions are enabled. The offset for 128-bit VSX registers is
2956 only 12-bits. While GPRs can handle the full offset range, VSX
2957 registers can only handle the restricted range. */
2958 else if ((addr_mask != 0) && !indexed_only_p
2959 && msize == 16 && TARGET_P9_VECTOR
2960 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2961 || (m2 == TImode && TARGET_VSX)))
2963 addr_mask |= RELOAD_REG_OFFSET;
2964 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2965 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2968 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2969 addressing on 128-bit types. */
2970 if (rc == RELOAD_REG_VMX && msize == 16
2971 && (addr_mask & RELOAD_REG_VALID) != 0)
2972 addr_mask |= RELOAD_REG_AND_M16;
2974 reg_addr[m].addr_mask[rc] = addr_mask;
2975 any_addr_mask |= addr_mask;
2978 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2983 /* Initialize the various global tables that are based on register size. */
2985 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2991 /* Precalculate REGNO_REG_CLASS. */
2992 rs6000_regno_regclass[0] = GENERAL_REGS;
2993 for (r = 1; r < 32; ++r)
2994 rs6000_regno_regclass[r] = BASE_REGS;
2996 for (r = 32; r < 64; ++r)
2997 rs6000_regno_regclass[r] = FLOAT_REGS;
2999 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
3000 rs6000_regno_regclass[r] = NO_REGS;
3002 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
3003 rs6000_regno_regclass[r] = ALTIVEC_REGS;
3005 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
3006 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
3007 rs6000_regno_regclass[r] = CR_REGS;
3009 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
3010 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
3011 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
3012 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
3013 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
3014 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
3015 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
3016 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
3017 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
3018 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
3020 /* Precalculate register class to simpler reload register class. We don't
3021 need all of the register classes that are combinations of different
3022 classes, just the simple ones that have constraint letters. */
3023 for (c = 0; c < N_REG_CLASSES; c++)
3024 reg_class_to_reg_type[c] = NO_REG_TYPE;
3026 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
3027 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
3028 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
3029 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
3030 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
3031 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
3032 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
3033 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
3034 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
3035 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
3039 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
3040 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
3044 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
3045 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
3048 /* Precalculate the valid memory formats as well as the vector information,
3049 this must be set up before the rs6000_hard_regno_nregs_internal calls
3051 gcc_assert ((int)VECTOR_NONE == 0);
3052 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3053 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
3055 gcc_assert ((int)CODE_FOR_nothing == 0);
3056 memset ((void *) ®_addr[0], '\0', sizeof (reg_addr));
3058 gcc_assert ((int)NO_REGS == 0);
3059 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3061 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3062 believes it can use native alignment or still uses 128-bit alignment. */
3063 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3074 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3075 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3076 if (TARGET_FLOAT128_TYPE)
3078 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3079 rs6000_vector_align[KFmode] = 128;
3081 if (FLOAT128_IEEE_P (TFmode))
3083 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3084 rs6000_vector_align[TFmode] = 128;
3088 /* V2DF mode, VSX only. */
3091 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3092 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3093 rs6000_vector_align[V2DFmode] = align64;
3096 /* V4SF mode, either VSX or Altivec. */
3099 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3100 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3101 rs6000_vector_align[V4SFmode] = align32;
3103 else if (TARGET_ALTIVEC)
3105 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3106 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3107 rs6000_vector_align[V4SFmode] = align32;
3110 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3114 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3115 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3116 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3117 rs6000_vector_align[V4SImode] = align32;
3118 rs6000_vector_align[V8HImode] = align32;
3119 rs6000_vector_align[V16QImode] = align32;
3123 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3124 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3125 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3129 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3130 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3131 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3135 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3136 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3139 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3140 rs6000_vector_unit[V2DImode]
3141 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3142 rs6000_vector_align[V2DImode] = align64;
3144 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3145 rs6000_vector_unit[V1TImode]
3146 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3147 rs6000_vector_align[V1TImode] = 128;
3150 /* DFmode, see if we want to use the VSX unit. Memory is handled
3151 differently, so don't set rs6000_vector_mem. */
3154 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3155 rs6000_vector_align[DFmode] = 64;
3158 /* SFmode, see if we want to use the VSX unit. */
3159 if (TARGET_P8_VECTOR)
3161 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3162 rs6000_vector_align[SFmode] = 32;
3165 /* Allow TImode in VSX register and set the VSX memory macros. */
3168 rs6000_vector_mem[TImode] = VECTOR_VSX;
3169 rs6000_vector_align[TImode] = align64;
3172 /* Register class constraints for the constraints that depend on compile
3173 switches. When the VSX code was added, different constraints were added
3174 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3175 of the VSX registers are used. The register classes for scalar floating
3176 point types is set, based on whether we allow that type into the upper
3177 (Altivec) registers. GCC has register classes to target the Altivec
3178 registers for load/store operations, to select using a VSX memory
3179 operation instead of the traditional floating point operation. The
3182 d - Register class to use with traditional DFmode instructions.
3183 f - Register class to use with traditional SFmode instructions.
3184 v - Altivec register.
3185 wa - Any VSX register.
3186 wc - Reserved to represent individual CR bits (used in LLVM).
3187 wd - Preferred register class for V2DFmode.
3188 wf - Preferred register class for V4SFmode.
3189 wg - Float register for power6x move insns.
3190 wh - FP register for direct move instructions.
3191 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3192 wj - FP or VSX register to hold 64-bit integers for direct moves.
3193 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3194 wl - Float register if we can do 32-bit signed int loads.
3195 wm - VSX register for ISA 2.07 direct move operations.
3196 wn - always NO_REGS.
3197 wr - GPR if 64-bit mode is permitted.
3198 ws - Register class to do ISA 2.06 DF operations.
3199 wt - VSX register for TImode in VSX registers.
3200 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3201 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3202 ww - Register class to do SF conversions in with VSX operations.
3203 wx - Float register if we can do 32-bit int stores.
3204 wy - Register class to do ISA 2.07 SF operations.
3205 wz - Float register if we can do 32-bit unsigned int loads.
3206 wH - Altivec register if SImode is allowed in VSX registers.
3207 wI - VSX register if SImode is allowed in VSX registers.
3208 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3209 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3211 if (TARGET_HARD_FLOAT)
3213 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3214 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3219 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3220 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3221 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3222 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; /* DFmode */
3223 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; /* DFmode */
3224 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS; /* DImode */
3225 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3228 /* Add conditional constraints based on various options, to allow us to
3229 collapse multiple insn patterns. */
3231 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3233 if (TARGET_MFPGPR) /* DFmode */
3234 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3237 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3239 if (TARGET_DIRECT_MOVE)
3241 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3242 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3243 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3244 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3245 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3246 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3249 if (TARGET_POWERPC64)
3251 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3252 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3255 if (TARGET_P8_VECTOR) /* SFmode */
3257 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3258 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3259 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3261 else if (TARGET_VSX)
3262 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3265 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3268 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3270 if (TARGET_FLOAT128_TYPE)
3272 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3273 if (FLOAT128_IEEE_P (TFmode))
3274 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3277 if (TARGET_P9_VECTOR)
3279 /* Support for new D-form instructions. */
3280 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3282 /* Support for ISA 3.0 (power9) vectors. */
3283 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3286 /* Support for new direct moves (ISA 3.0 + 64bit). */
3287 if (TARGET_DIRECT_MOVE_128)
3288 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3290 /* Support small integers in VSX registers. */
3291 if (TARGET_P8_VECTOR)
3293 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3294 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3295 if (TARGET_P9_VECTOR)
3297 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3298 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3302 /* Set up the reload helper and direct move functions. */
3303 if (TARGET_VSX || TARGET_ALTIVEC)
3307 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3308 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3309 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3310 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3311 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3312 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3313 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3314 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3315 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3316 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3317 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3318 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3319 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3320 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3321 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3322 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3323 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3324 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3325 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3326 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3328 if (FLOAT128_VECTOR_P (KFmode))
3330 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3331 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3334 if (FLOAT128_VECTOR_P (TFmode))
3336 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3337 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3340 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3342 if (TARGET_NO_SDMODE_STACK)
3344 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3345 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3350 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3351 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3354 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3356 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3357 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3358 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3359 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3360 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3361 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3362 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3363 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3364 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3366 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3367 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3368 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3369 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3370 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3371 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3372 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3373 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3374 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3376 if (FLOAT128_VECTOR_P (KFmode))
3378 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3379 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3382 if (FLOAT128_VECTOR_P (TFmode))
3384 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3385 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3391 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3392 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3393 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3394 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3395 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3396 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3397 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3398 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3399 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3400 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3401 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3402 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3403 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3404 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3405 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3406 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3407 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3408 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3409 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3410 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3412 if (FLOAT128_VECTOR_P (KFmode))
3414 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3415 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3418 if (FLOAT128_IEEE_P (TFmode))
3420 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3421 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3424 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3426 if (TARGET_NO_SDMODE_STACK)
3428 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3429 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3434 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3435 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3438 if (TARGET_DIRECT_MOVE)
3440 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3441 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3442 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3446 reg_addr[DFmode].scalar_in_vmx_p = true;
3447 reg_addr[DImode].scalar_in_vmx_p = true;
3449 if (TARGET_P8_VECTOR)
3451 reg_addr[SFmode].scalar_in_vmx_p = true;
3452 reg_addr[SImode].scalar_in_vmx_p = true;
3454 if (TARGET_P9_VECTOR)
3456 reg_addr[HImode].scalar_in_vmx_p = true;
3457 reg_addr[QImode].scalar_in_vmx_p = true;
3462 /* Precalculate HARD_REGNO_NREGS. */
3463 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3464 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3465 rs6000_hard_regno_nregs[m][r]
3466 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3468 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3469 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3470 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3471 if (rs6000_hard_regno_mode_ok_uncached (r, (machine_mode)m))
3472 rs6000_hard_regno_mode_ok_p[m][r] = true;
3474 /* Precalculate CLASS_MAX_NREGS sizes. */
3475 for (c = 0; c < LIM_REG_CLASSES; ++c)
3479 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3480 reg_size = UNITS_PER_VSX_WORD;
3482 else if (c == ALTIVEC_REGS)
3483 reg_size = UNITS_PER_ALTIVEC_WORD;
3485 else if (c == FLOAT_REGS)
3486 reg_size = UNITS_PER_FP_WORD;
3489 reg_size = UNITS_PER_WORD;
3491 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3493 machine_mode m2 = (machine_mode)m;
3494 int reg_size2 = reg_size;
3496 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3498 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3499 reg_size2 = UNITS_PER_FP_WORD;
3501 rs6000_class_max_nregs[m][c]
3502 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3506 /* Calculate which modes to automatically generate code to use a the
3507 reciprocal divide and square root instructions. In the future, possibly
3508 automatically generate the instructions even if the user did not specify
3509 -mrecip. The older machines double precision reciprocal sqrt estimate is
3510 not accurate enough. */
3511 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3513 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3515 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3516 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3517 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3518 if (VECTOR_UNIT_VSX_P (V2DFmode))
3519 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3521 if (TARGET_FRSQRTES)
3522 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3524 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3525 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3526 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3527 if (VECTOR_UNIT_VSX_P (V2DFmode))
3528 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3530 if (rs6000_recip_control)
3532 if (!flag_finite_math_only)
3533 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3535 if (flag_trapping_math)
3536 warning (0, "%qs requires %qs or %qs", "-mrecip",
3537 "-fno-trapping-math", "-ffast-math");
3538 if (!flag_reciprocal_math)
3539 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3541 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3543 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3544 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3545 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3547 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3548 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3549 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3551 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3552 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3553 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3555 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3556 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3557 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3559 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3560 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3561 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3563 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3564 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3565 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3567 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3568 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3569 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3571 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3572 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3573 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3577 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3578 legitimate address support to figure out the appropriate addressing to
3580 rs6000_setup_reg_addr_masks ();
3582 if (global_init_p || TARGET_DEBUG_TARGET)
3584 if (TARGET_DEBUG_REG)
3585 rs6000_debug_reg_global ();
3587 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3589 "SImode variable mult cost = %d\n"
3590 "SImode constant mult cost = %d\n"
3591 "SImode short constant mult cost = %d\n"
3592 "DImode multipliciation cost = %d\n"
3593 "SImode division cost = %d\n"
3594 "DImode division cost = %d\n"
3595 "Simple fp operation cost = %d\n"
3596 "DFmode multiplication cost = %d\n"
3597 "SFmode division cost = %d\n"
3598 "DFmode division cost = %d\n"
3599 "cache line size = %d\n"
3600 "l1 cache size = %d\n"
3601 "l2 cache size = %d\n"
3602 "simultaneous prefetches = %d\n"
3605 rs6000_cost->mulsi_const,
3606 rs6000_cost->mulsi_const9,
3614 rs6000_cost->cache_line_size,
3615 rs6000_cost->l1_cache_size,
3616 rs6000_cost->l2_cache_size,
3617 rs6000_cost->simultaneous_prefetches);
3622 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3625 darwin_rs6000_override_options (void)
3627 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3629 rs6000_altivec_abi = 1;
3630 TARGET_ALTIVEC_VRSAVE = 1;
3631 rs6000_current_abi = ABI_DARWIN;
3633 if (DEFAULT_ABI == ABI_DARWIN
3635 darwin_one_byte_bool = 1;
3637 if (TARGET_64BIT && ! TARGET_POWERPC64)
3639 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3640 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3644 rs6000_default_long_calls = 1;
3645 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3648 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3650 if (!flag_mkernel && !flag_apple_kext
3652 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3653 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3655 /* Unless the user (not the configurer) has explicitly overridden
3656 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3657 G4 unless targeting the kernel. */
3660 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3661 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3662 && ! global_options_set.x_rs6000_cpu_index)
3664 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3669 /* If not otherwise specified by a target, make 'long double' equivalent to
3672 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3673 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3676 /* Return the builtin mask of the various options used that could affect which
3677 builtins were used. In the past we used target_flags, but we've run out of
3678 bits, and some options are no longer in target_flags. */
3681 rs6000_builtin_mask_calculate (void)
3683 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3684 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3685 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3686 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3687 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3688 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3689 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3690 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3691 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3692 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3693 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3694 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3695 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3696 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3697 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3698 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3699 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3700 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3701 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3702 | ((TARGET_LONG_DOUBLE_128
3703 && TARGET_HARD_FLOAT
3704 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3705 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3706 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3709 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3710 to clobber the XER[CA] bit because clobbering that bit without telling
3711 the compiler worked just fine with versions of GCC before GCC 5, and
3712 breaking a lot of older code in ways that are hard to track down is
3713 not such a great idea. */
3716 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3717 vec<const char *> &/*constraints*/,
3718 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3720 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3721 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3725 /* Override command line options.
3727 Combine build-specific configuration information with options
3728 specified on the command line to set various state variables which
3729 influence code generation, optimization, and expansion of built-in
3730 functions. Assure that command-line configuration preferences are
3731 compatible with each other and with the build configuration; issue
3732 warnings while adjusting configuration or error messages while
3733 rejecting configuration.
3735 Upon entry to this function:
3737 This function is called once at the beginning of
3738 compilation, and then again at the start and end of compiling
3739 each section of code that has a different configuration, as
3740 indicated, for example, by adding the
3742 __attribute__((__target__("cpu=power9")))
3744 qualifier to a function definition or, for example, by bracketing
3747 #pragma GCC target("altivec")
3751 #pragma GCC reset_options
3753 directives. Parameter global_init_p is true for the initial
3754 invocation, which initializes global variables, and false for all
3755 subsequent invocations.
3758 Various global state information is assumed to be valid. This
3759 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3760 default CPU specified at build configure time, TARGET_DEFAULT,
3761 representing the default set of option flags for the default
3762 target, and global_options_set.x_rs6000_isa_flags, representing
3763 which options were requested on the command line.
3765 Upon return from this function:
3767 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3768 was set by name on the command line. Additionally, if certain
3769 attributes are automatically enabled or disabled by this function
3770 in order to assure compatibility between options and
3771 configuration, the flags associated with those attributes are
3772 also set. By setting these "explicit bits", we avoid the risk
3773 that other code might accidentally overwrite these particular
3774 attributes with "default values".
3776 The various bits of rs6000_isa_flags are set to indicate the
3777 target options that have been selected for the most current
3778 compilation efforts. This has the effect of also turning on the
3779 associated TARGET_XXX values since these are macros which are
3780 generally defined to test the corresponding bit of the
3781 rs6000_isa_flags variable.
3783 The variable rs6000_builtin_mask is set to represent the target
3784 options for the most current compilation efforts, consistent with
3785 the current contents of rs6000_isa_flags. This variable controls
3786 expansion of built-in functions.
3788 Various other global variables and fields of global structures
3789 (over 50 in all) are initialized to reflect the desired options
3790 for the most current compilation efforts. */
3793 rs6000_option_override_internal (bool global_init_p)
3797 HOST_WIDE_INT set_masks;
3798 HOST_WIDE_INT ignore_masks;
3801 struct cl_target_option *main_target_opt
3802 = ((global_init_p || target_option_default_node == NULL)
3803 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3805 /* Print defaults. */
3806 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3807 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3809 /* Remember the explicit arguments. */
3811 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3813 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3814 library functions, so warn about it. The flag may be useful for
3815 performance studies from time to time though, so don't disable it
3817 if (global_options_set.x_rs6000_alignment_flags
3818 && rs6000_alignment_flags == MASK_ALIGN_POWER
3819 && DEFAULT_ABI == ABI_DARWIN
3821 warning (0, "%qs is not supported for 64-bit Darwin;"
3822 " it is incompatible with the installed C and C++ libraries",
3825 /* Numerous experiment shows that IRA based loop pressure
3826 calculation works better for RTL loop invariant motion on targets
3827 with enough (>= 32) registers. It is an expensive optimization.
3828 So it is on only for peak performance. */
3829 if (optimize >= 3 && global_init_p
3830 && !global_options_set.x_flag_ira_loop_pressure)
3831 flag_ira_loop_pressure = 1;
3833 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3834 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3835 options were already specified. */
3836 if (flag_sanitize & SANITIZE_USER_ADDRESS
3837 && !global_options_set.x_flag_asynchronous_unwind_tables)
3838 flag_asynchronous_unwind_tables = 1;
3840 /* Set the pointer size. */
3843 rs6000_pmode = DImode;
3844 rs6000_pointer_size = 64;
3848 rs6000_pmode = SImode;
3849 rs6000_pointer_size = 32;
3852 /* Some OSs don't support saving the high part of 64-bit registers on context
3853 switch. Other OSs don't support saving Altivec registers. On those OSs,
3854 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3855 if the user wants either, the user must explicitly specify them and we
3856 won't interfere with the user's specification. */
3858 set_masks = POWERPC_MASKS;
3859 #ifdef OS_MISSING_POWERPC64
3860 if (OS_MISSING_POWERPC64)
3861 set_masks &= ~OPTION_MASK_POWERPC64;
3863 #ifdef OS_MISSING_ALTIVEC
3864 if (OS_MISSING_ALTIVEC)
3865 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3866 | OTHER_VSX_VECTOR_MASKS);
3869 /* Don't override by the processor default if given explicitly. */
3870 set_masks &= ~rs6000_isa_flags_explicit;
3872 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3873 the cpu in a target attribute or pragma, but did not specify a tuning
3874 option, use the cpu for the tuning option rather than the option specified
3875 with -mtune on the command line. Process a '--with-cpu' configuration
3876 request as an implicit --cpu. */
3877 if (rs6000_cpu_index >= 0)
3878 cpu_index = rs6000_cpu_index;
3879 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3880 cpu_index = main_target_opt->x_rs6000_cpu_index;
3881 else if (OPTION_TARGET_CPU_DEFAULT)
3882 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3884 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3885 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3886 with those from the cpu, except for options that were explicitly set. If
3887 we don't have a cpu, do not override the target bits set in
3891 rs6000_cpu_index = cpu_index;
3892 rs6000_isa_flags &= ~set_masks;
3893 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3898 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3899 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3900 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3901 to using rs6000_isa_flags, we need to do the initialization here.
3903 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3904 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3905 HOST_WIDE_INT flags;
3907 flags = TARGET_DEFAULT;
3910 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3911 const char *default_cpu = (!TARGET_POWERPC64
3916 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3917 flags = processor_target_table[default_cpu_index].target_enable;
3919 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3922 if (rs6000_tune_index >= 0)
3923 tune_index = rs6000_tune_index;
3924 else if (cpu_index >= 0)
3925 rs6000_tune_index = tune_index = cpu_index;
3929 enum processor_type tune_proc
3930 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3933 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3934 if (processor_target_table[i].processor == tune_proc)
3942 rs6000_cpu = processor_target_table[cpu_index].processor;
3944 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3946 gcc_assert (tune_index >= 0);
3947 rs6000_tune = processor_target_table[tune_index].processor;
3949 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3950 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3951 || rs6000_cpu == PROCESSOR_PPCE5500)
3954 error ("AltiVec not supported in this target");
3957 /* If we are optimizing big endian systems for space, use the load/store
3958 multiple instructions. */
3959 if (BYTES_BIG_ENDIAN && optimize_size)
3960 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3962 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3963 because the hardware doesn't support the instructions used in little
3964 endian mode, and causes an alignment trap. The 750 does not cause an
3965 alignment trap (except when the target is unaligned). */
3967 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3969 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3970 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3971 warning (0, "%qs is not supported on little endian systems",
3975 /* If little-endian, default to -mstrict-align on older processors.
3976 Testing for htm matches power8 and later. */
3977 if (!BYTES_BIG_ENDIAN
3978 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3979 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3981 if (!rs6000_fold_gimple)
3983 "gimple folding of rs6000 builtins has been disabled.\n");
3985 /* Add some warnings for VSX. */
3988 const char *msg = NULL;
3989 if (!TARGET_HARD_FLOAT)
3991 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3992 msg = N_("-mvsx requires hardware floating point");
3995 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3996 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3999 else if (TARGET_AVOID_XFORM > 0)
4000 msg = N_("-mvsx needs indexed addressing");
4001 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4002 & OPTION_MASK_ALTIVEC))
4004 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4005 msg = N_("-mvsx and -mno-altivec are incompatible");
4007 msg = N_("-mno-altivec disables vsx");
4013 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4014 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4018 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4019 the -mcpu setting to enable options that conflict. */
4020 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4021 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4022 | OPTION_MASK_ALTIVEC
4023 | OPTION_MASK_VSX)) != 0)
4024 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4025 | OPTION_MASK_DIRECT_MOVE)
4026 & ~rs6000_isa_flags_explicit);
4028 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4029 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4031 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4032 off all of the options that depend on those flags. */
4033 ignore_masks = rs6000_disable_incompatible_switches ();
4035 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4036 unless the user explicitly used the -mno-<option> to disable the code. */
4037 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
4038 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4039 else if (TARGET_P9_MINMAX)
4043 if (cpu_index == PROCESSOR_POWER9)
4045 /* legacy behavior: allow -mcpu=power9 with certain
4046 capabilities explicitly disabled. */
4047 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4050 error ("power9 target option is incompatible with %<%s=<xxx>%> "
4051 "for <xxx> less than power9", "-mcpu");
4053 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
4054 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4055 & rs6000_isa_flags_explicit))
4056 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4057 were explicitly cleared. */
4058 error ("%qs incompatible with explicitly disabled options",
4061 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4063 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4064 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
4065 else if (TARGET_VSX)
4066 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
4067 else if (TARGET_POPCNTD)
4068 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
4069 else if (TARGET_DFP)
4070 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
4071 else if (TARGET_CMPB)
4072 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
4073 else if (TARGET_FPRND)
4074 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
4075 else if (TARGET_POPCNTB)
4076 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
4077 else if (TARGET_ALTIVEC)
4078 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
4080 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4082 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4083 error ("%qs requires %qs", "-mcrypto", "-maltivec");
4084 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4087 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4089 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4090 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
4091 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4094 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4096 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4097 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
4098 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4101 if (TARGET_P8_VECTOR && !TARGET_VSX)
4103 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4104 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4105 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
4106 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4108 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4109 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4110 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4114 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4116 rs6000_isa_flags |= OPTION_MASK_VSX;
4117 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4121 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4123 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4124 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
4125 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4128 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4129 silently turn off quad memory mode. */
4130 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4132 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4133 warning (0, N_("-mquad-memory requires 64-bit mode"));
4135 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4136 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4138 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4139 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4142 /* Non-atomic quad memory load/store are disabled for little endian, since
4143 the words are reversed, but atomic operations can still be done by
4144 swapping the words. */
4145 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4147 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4148 warning (0, N_("-mquad-memory is not available in little endian "
4151 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4154 /* Assume if the user asked for normal quad memory instructions, they want
4155 the atomic versions as well, unless they explicity told us not to use quad
4156 word atomic instructions. */
4157 if (TARGET_QUAD_MEMORY
4158 && !TARGET_QUAD_MEMORY_ATOMIC
4159 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4160 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4162 /* If we can shrink-wrap the TOC register save separately, then use
4163 -msave-toc-indirect unless explicitly disabled. */
4164 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4165 && flag_shrink_wrap_separate
4166 && optimize_function_for_speed_p (cfun))
4167 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4169 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4170 generating power8 instructions. Power9 does not optimize power8 fusion
4172 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4174 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4175 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4177 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4180 /* Setting additional fusion flags turns on base fusion. */
4181 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4183 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4185 if (TARGET_P8_FUSION_SIGN)
4186 error ("%qs requires %qs", "-mpower8-fusion-sign",
4189 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4192 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4195 /* Power8 does not fuse sign extended loads with the addis. If we are
4196 optimizing at high levels for speed, convert a sign extended load into a
4197 zero extending load, and an explicit sign extension. */
4198 if (TARGET_P8_FUSION
4199 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4200 && optimize_function_for_speed_p (cfun)
4202 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4204 /* ISA 3.0 vector instructions include ISA 2.07. */
4205 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4207 /* We prefer to not mention undocumented options in
4208 error messages. However, if users have managed to select
4209 power9-vector without selecting power8-vector, they
4210 already know about undocumented flags. */
4211 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4212 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4213 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4214 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4216 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4217 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4218 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4222 /* OPTION_MASK_P9_VECTOR is explicit and
4223 OPTION_MASK_P8_VECTOR is not explicit. */
4224 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4225 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4229 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4230 support. If we only have ISA 2.06 support, and the user did not specify
4231 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4232 but we don't enable the full vectorization support */
4233 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4234 TARGET_ALLOW_MOVMISALIGN = 1;
4236 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4238 if (TARGET_ALLOW_MOVMISALIGN > 0
4239 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4240 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4242 TARGET_ALLOW_MOVMISALIGN = 0;
4245 /* Determine when unaligned vector accesses are permitted, and when
4246 they are preferred over masked Altivec loads. Note that if
4247 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4248 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4250 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4254 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4255 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4257 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4260 else if (!TARGET_ALLOW_MOVMISALIGN)
4262 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4263 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4264 "-mallow-movmisalign");
4266 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4270 /* Use long double size to select the appropriate long double. We use
4271 TYPE_PRECISION to differentiate the 3 different long double types. We map
4272 128 into the precision used for TFmode. */
4273 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4275 : FLOAT_PRECISION_TFmode);
4277 /* Set long double size before the IEEE 128-bit tests. */
4278 if (!global_options_set.x_rs6000_long_double_type_size)
4280 if (main_target_opt != NULL
4281 && (main_target_opt->x_rs6000_long_double_type_size
4282 != default_long_double_size))
4283 error ("target attribute or pragma changes long double size");
4285 rs6000_long_double_type_size = default_long_double_size;
4287 else if (rs6000_long_double_type_size == 128)
4288 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4289 else if (global_options_set.x_rs6000_ieeequad)
4291 if (global_options.x_rs6000_ieeequad)
4292 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4294 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4297 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4298 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4299 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4300 those systems will not pick up this default. Warn if the user changes the
4301 default unless -Wno-psabi. */
4302 if (!global_options_set.x_rs6000_ieeequad)
4303 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4307 if (global_options.x_rs6000_ieeequad
4308 && (!TARGET_POPCNTD || !TARGET_VSX))
4309 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4311 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4313 static bool warned_change_long_double;
4314 if (!warned_change_long_double)
4316 warned_change_long_double = true;
4317 if (TARGET_IEEEQUAD)
4318 warning (OPT_Wpsabi, "Using IEEE extended precision long double");
4320 warning (OPT_Wpsabi, "Using IBM extended precision long double");
4325 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4326 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
4327 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4328 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4329 the keyword as well as the type. */
4330 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4332 /* IEEE 128-bit floating point requires VSX support. */
4333 if (TARGET_FLOAT128_KEYWORD)
4337 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4338 error ("%qs requires VSX support", "-mfloat128");
4340 TARGET_FLOAT128_TYPE = 0;
4341 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4342 | OPTION_MASK_FLOAT128_HW);
4344 else if (!TARGET_FLOAT128_TYPE)
4346 TARGET_FLOAT128_TYPE = 1;
4347 warning (0, "The -mfloat128 option may not be fully supported");
4351 /* Enable the __float128 keyword under Linux by default. */
4352 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4353 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4354 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4356 /* If we have are supporting the float128 type and full ISA 3.0 support,
4357 enable -mfloat128-hardware by default. However, don't enable the
4358 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4359 because sometimes the compiler wants to put things in an integer
4360 container, and if we don't have __int128 support, it is impossible. */
4361 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4362 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4363 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4364 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4366 if (TARGET_FLOAT128_HW
4367 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4369 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4370 error ("%qs requires full ISA 3.0 support", "-mfloat128-hardware");
4372 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4375 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4377 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4378 error ("%qs requires %qs", "-mfloat128-hardware", "-m64");
4380 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4383 /* Print the options after updating the defaults. */
4384 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4385 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4387 /* E500mc does "better" if we inline more aggressively. Respect the
4388 user's opinion, though. */
4389 if (rs6000_block_move_inline_limit == 0
4390 && (rs6000_tune == PROCESSOR_PPCE500MC
4391 || rs6000_tune == PROCESSOR_PPCE500MC64
4392 || rs6000_tune == PROCESSOR_PPCE5500
4393 || rs6000_tune == PROCESSOR_PPCE6500))
4394 rs6000_block_move_inline_limit = 128;
4396 /* store_one_arg depends on expand_block_move to handle at least the
4397 size of reg_parm_stack_space. */
4398 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4399 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4403 /* If the appropriate debug option is enabled, replace the target hooks
4404 with debug versions that call the real version and then prints
4405 debugging information. */
4406 if (TARGET_DEBUG_COST)
4408 targetm.rtx_costs = rs6000_debug_rtx_costs;
4409 targetm.address_cost = rs6000_debug_address_cost;
4410 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4413 if (TARGET_DEBUG_ADDR)
4415 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4416 targetm.legitimize_address = rs6000_debug_legitimize_address;
4417 rs6000_secondary_reload_class_ptr
4418 = rs6000_debug_secondary_reload_class;
4419 targetm.secondary_memory_needed
4420 = rs6000_debug_secondary_memory_needed;
4421 targetm.can_change_mode_class
4422 = rs6000_debug_can_change_mode_class;
4423 rs6000_preferred_reload_class_ptr
4424 = rs6000_debug_preferred_reload_class;
4425 rs6000_legitimize_reload_address_ptr
4426 = rs6000_debug_legitimize_reload_address;
4427 rs6000_mode_dependent_address_ptr
4428 = rs6000_debug_mode_dependent_address;
4431 if (rs6000_veclibabi_name)
4433 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4434 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4437 error ("unknown vectorization library ABI type (%qs) for "
4438 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4444 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4445 target attribute or pragma which automatically enables both options,
4446 unless the altivec ABI was set. This is set by default for 64-bit, but
4448 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4450 TARGET_FLOAT128_TYPE = 0;
4451 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4452 | OPTION_MASK_FLOAT128_KEYWORD)
4453 & ~rs6000_isa_flags_explicit);
4456 /* Enable Altivec ABI for AIX -maltivec. */
4457 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4459 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4460 error ("target attribute or pragma changes AltiVec ABI");
4462 rs6000_altivec_abi = 1;
4465 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4466 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4467 be explicitly overridden in either case. */
4470 if (!global_options_set.x_rs6000_altivec_abi
4471 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4473 if (main_target_opt != NULL &&
4474 !main_target_opt->x_rs6000_altivec_abi)
4475 error ("target attribute or pragma changes AltiVec ABI");
4477 rs6000_altivec_abi = 1;
4481 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4482 So far, the only darwin64 targets are also MACH-O. */
4484 && DEFAULT_ABI == ABI_DARWIN
4487 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4488 error ("target attribute or pragma changes darwin64 ABI");
4491 rs6000_darwin64_abi = 1;
4492 /* Default to natural alignment, for better performance. */
4493 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4497 /* Place FP constants in the constant pool instead of TOC
4498 if section anchors enabled. */
4499 if (flag_section_anchors
4500 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4501 TARGET_NO_FP_IN_TOC = 1;
4503 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4504 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4506 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4507 SUBTARGET_OVERRIDE_OPTIONS;
4509 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4510 SUBSUBTARGET_OVERRIDE_OPTIONS;
4512 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4513 SUB3TARGET_OVERRIDE_OPTIONS;
4516 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4517 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4519 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4520 && rs6000_tune != PROCESSOR_POWER5
4521 && rs6000_tune != PROCESSOR_POWER6
4522 && rs6000_tune != PROCESSOR_POWER7
4523 && rs6000_tune != PROCESSOR_POWER8
4524 && rs6000_tune != PROCESSOR_POWER9
4525 && rs6000_tune != PROCESSOR_PPCA2
4526 && rs6000_tune != PROCESSOR_CELL
4527 && rs6000_tune != PROCESSOR_PPC476);
4528 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4529 || rs6000_tune == PROCESSOR_POWER5
4530 || rs6000_tune == PROCESSOR_POWER7
4531 || rs6000_tune == PROCESSOR_POWER8);
4532 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4533 || rs6000_tune == PROCESSOR_POWER5
4534 || rs6000_tune == PROCESSOR_POWER6
4535 || rs6000_tune == PROCESSOR_POWER7
4536 || rs6000_tune == PROCESSOR_POWER8
4537 || rs6000_tune == PROCESSOR_POWER9
4538 || rs6000_tune == PROCESSOR_PPCE500MC
4539 || rs6000_tune == PROCESSOR_PPCE500MC64
4540 || rs6000_tune == PROCESSOR_PPCE5500
4541 || rs6000_tune == PROCESSOR_PPCE6500);
4543 /* Allow debug switches to override the above settings. These are set to -1
4544 in rs6000.opt to indicate the user hasn't directly set the switch. */
4545 if (TARGET_ALWAYS_HINT >= 0)
4546 rs6000_always_hint = TARGET_ALWAYS_HINT;
4548 if (TARGET_SCHED_GROUPS >= 0)
4549 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4551 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4552 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4554 rs6000_sched_restricted_insns_priority
4555 = (rs6000_sched_groups ? 1 : 0);
4557 /* Handle -msched-costly-dep option. */
4558 rs6000_sched_costly_dep
4559 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4561 if (rs6000_sched_costly_dep_str)
4563 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4564 rs6000_sched_costly_dep = no_dep_costly;
4565 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4566 rs6000_sched_costly_dep = all_deps_costly;
4567 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4568 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4569 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4570 rs6000_sched_costly_dep = store_to_load_dep_costly;
4572 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4573 atoi (rs6000_sched_costly_dep_str));
4576 /* Handle -minsert-sched-nops option. */
4577 rs6000_sched_insert_nops
4578 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4580 if (rs6000_sched_insert_nops_str)
4582 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4583 rs6000_sched_insert_nops = sched_finish_none;
4584 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4585 rs6000_sched_insert_nops = sched_finish_pad_groups;
4586 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4587 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4589 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4590 atoi (rs6000_sched_insert_nops_str));
4593 /* Handle stack protector */
4594 if (!global_options_set.x_rs6000_stack_protector_guard)
4595 #ifdef TARGET_THREAD_SSP_OFFSET
4596 rs6000_stack_protector_guard = SSP_TLS;
4598 rs6000_stack_protector_guard = SSP_GLOBAL;
4601 #ifdef TARGET_THREAD_SSP_OFFSET
4602 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4603 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4606 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4609 const char *str = rs6000_stack_protector_guard_offset_str;
4612 long offset = strtol (str, &endp, 0);
4613 if (!*str || *endp || errno)
4614 error ("%qs is not a valid number in %qs", str,
4615 "-mstack-protector-guard-offset=");
4617 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4618 || (TARGET_64BIT && (offset & 3)))
4619 error ("%qs is not a valid offset in %qs", str,
4620 "-mstack-protector-guard-offset=");
4622 rs6000_stack_protector_guard_offset = offset;
4625 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4627 const char *str = rs6000_stack_protector_guard_reg_str;
4628 int reg = decode_reg_name (str);
4630 if (!IN_RANGE (reg, 1, 31))
4631 error ("%qs is not a valid base register in %qs", str,
4632 "-mstack-protector-guard-reg=");
4634 rs6000_stack_protector_guard_reg = reg;
4637 if (rs6000_stack_protector_guard == SSP_TLS
4638 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4639 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4643 #ifdef TARGET_REGNAMES
4644 /* If the user desires alternate register names, copy in the
4645 alternate names now. */
4646 if (TARGET_REGNAMES)
4647 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4650 /* Set aix_struct_return last, after the ABI is determined.
4651 If -maix-struct-return or -msvr4-struct-return was explicitly
4652 used, don't override with the ABI default. */
4653 if (!global_options_set.x_aix_struct_return)
4654 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4657 /* IBM XL compiler defaults to unsigned bitfields. */
4658 if (TARGET_XL_COMPAT)
4659 flag_signed_bitfields = 0;
4662 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4663 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4665 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4667 /* We can only guarantee the availability of DI pseudo-ops when
4668 assembling for 64-bit targets. */
4671 targetm.asm_out.aligned_op.di = NULL;
4672 targetm.asm_out.unaligned_op.di = NULL;
4676 /* Set branch target alignment, if not optimizing for size. */
4679 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4680 aligned 8byte to avoid misprediction by the branch predictor. */
4681 if (rs6000_tune == PROCESSOR_TITAN
4682 || rs6000_tune == PROCESSOR_CELL)
4684 if (flag_align_functions && !str_align_functions)
4685 str_align_functions = "8";
4686 if (flag_align_jumps && !str_align_jumps)
4687 str_align_jumps = "8";
4688 if (flag_align_loops && !str_align_loops)
4689 str_align_loops = "8";
4691 if (rs6000_align_branch_targets)
4693 if (flag_align_functions && !str_align_functions)
4694 str_align_functions = "16";
4695 if (flag_align_jumps && !str_align_jumps)
4696 str_align_jumps = "16";
4697 if (flag_align_loops && !str_align_loops)
4699 can_override_loop_align = 1;
4700 str_align_loops = "16";
4704 if (flag_align_jumps && !str_align_jumps)
4705 str_align_jumps = "16";
4706 if (flag_align_loops && !str_align_loops)
4707 str_align_loops = "16";
4710 /* Arrange to save and restore machine status around nested functions. */
4711 init_machine_status = rs6000_init_machine_status;
4713 /* We should always be splitting complex arguments, but we can't break
4714 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4715 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4716 targetm.calls.split_complex_arg = NULL;
4718 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4719 if (DEFAULT_ABI == ABI_AIX)
4720 targetm.calls.custom_function_descriptors = 0;
4723 /* Initialize rs6000_cost with the appropriate target costs. */
4725 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4727 switch (rs6000_tune)
4729 case PROCESSOR_RS64A:
4730 rs6000_cost = &rs64a_cost;
4733 case PROCESSOR_MPCCORE:
4734 rs6000_cost = &mpccore_cost;
4737 case PROCESSOR_PPC403:
4738 rs6000_cost = &ppc403_cost;
4741 case PROCESSOR_PPC405:
4742 rs6000_cost = &ppc405_cost;
4745 case PROCESSOR_PPC440:
4746 rs6000_cost = &ppc440_cost;
4749 case PROCESSOR_PPC476:
4750 rs6000_cost = &ppc476_cost;
4753 case PROCESSOR_PPC601:
4754 rs6000_cost = &ppc601_cost;
4757 case PROCESSOR_PPC603:
4758 rs6000_cost = &ppc603_cost;
4761 case PROCESSOR_PPC604:
4762 rs6000_cost = &ppc604_cost;
4765 case PROCESSOR_PPC604e:
4766 rs6000_cost = &ppc604e_cost;
4769 case PROCESSOR_PPC620:
4770 rs6000_cost = &ppc620_cost;
4773 case PROCESSOR_PPC630:
4774 rs6000_cost = &ppc630_cost;
4777 case PROCESSOR_CELL:
4778 rs6000_cost = &ppccell_cost;
4781 case PROCESSOR_PPC750:
4782 case PROCESSOR_PPC7400:
4783 rs6000_cost = &ppc750_cost;
4786 case PROCESSOR_PPC7450:
4787 rs6000_cost = &ppc7450_cost;
4790 case PROCESSOR_PPC8540:
4791 case PROCESSOR_PPC8548:
4792 rs6000_cost = &ppc8540_cost;
4795 case PROCESSOR_PPCE300C2:
4796 case PROCESSOR_PPCE300C3:
4797 rs6000_cost = &ppce300c2c3_cost;
4800 case PROCESSOR_PPCE500MC:
4801 rs6000_cost = &ppce500mc_cost;
4804 case PROCESSOR_PPCE500MC64:
4805 rs6000_cost = &ppce500mc64_cost;
4808 case PROCESSOR_PPCE5500:
4809 rs6000_cost = &ppce5500_cost;
4812 case PROCESSOR_PPCE6500:
4813 rs6000_cost = &ppce6500_cost;
4816 case PROCESSOR_TITAN:
4817 rs6000_cost = &titan_cost;
4820 case PROCESSOR_POWER4:
4821 case PROCESSOR_POWER5:
4822 rs6000_cost = &power4_cost;
4825 case PROCESSOR_POWER6:
4826 rs6000_cost = &power6_cost;
4829 case PROCESSOR_POWER7:
4830 rs6000_cost = &power7_cost;
4833 case PROCESSOR_POWER8:
4834 rs6000_cost = &power8_cost;
4837 case PROCESSOR_POWER9:
4838 rs6000_cost = &power9_cost;
4841 case PROCESSOR_PPCA2:
4842 rs6000_cost = &ppca2_cost;
4851 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4852 rs6000_cost->simultaneous_prefetches,
4853 global_options.x_param_values,
4854 global_options_set.x_param_values);
4855 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4856 global_options.x_param_values,
4857 global_options_set.x_param_values);
4858 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4859 rs6000_cost->cache_line_size,
4860 global_options.x_param_values,
4861 global_options_set.x_param_values);
4862 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4863 global_options.x_param_values,
4864 global_options_set.x_param_values);
4866 /* Increase loop peeling limits based on performance analysis. */
4867 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4868 global_options.x_param_values,
4869 global_options_set.x_param_values);
4870 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4871 global_options.x_param_values,
4872 global_options_set.x_param_values);
4874 /* Use the 'model' -fsched-pressure algorithm by default. */
4875 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
4876 SCHED_PRESSURE_MODEL,
4877 global_options.x_param_values,
4878 global_options_set.x_param_values);
4880 /* If using typedef char *va_list, signal that
4881 __builtin_va_start (&ap, 0) can be optimized to
4882 ap = __builtin_next_arg (0). */
4883 if (DEFAULT_ABI != ABI_V4)
4884 targetm.expand_builtin_va_start = NULL;
4887 /* If not explicitly specified via option, decide whether to generate indexed
4888 load/store instructions. A value of -1 indicates that the
4889 initial value of this variable has not been overwritten. During
4890 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4891 if (TARGET_AVOID_XFORM == -1)
4892 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4893 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4894 need indexed accesses and the type used is the scalar type of the element
4895 being loaded or stored. */
4896 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4897 && !TARGET_ALTIVEC);
4899 /* Set the -mrecip options. */
4900 if (rs6000_recip_name)
4902 char *p = ASTRDUP (rs6000_recip_name);
4904 unsigned int mask, i;
4907 while ((q = strtok (p, ",")) != NULL)
4918 if (!strcmp (q, "default"))
4919 mask = ((TARGET_RECIP_PRECISION)
4920 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4923 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4924 if (!strcmp (q, recip_options[i].string))
4926 mask = recip_options[i].mask;
4930 if (i == ARRAY_SIZE (recip_options))
4932 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4940 rs6000_recip_control &= ~mask;
4942 rs6000_recip_control |= mask;
4946 /* Set the builtin mask of the various options used that could affect which
4947 builtins were used. In the past we used target_flags, but we've run out
4948 of bits, and some options are no longer in target_flags. */
4949 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4950 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4951 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4952 rs6000_builtin_mask);
4954 /* Initialize all of the registers. */
4955 rs6000_init_hard_regno_mode_ok (global_init_p);
4957 /* Save the initial options in case the user does function specific options */
4959 target_option_default_node = target_option_current_node
4960 = build_target_option_node (&global_options);
4962 /* If not explicitly specified via option, decide whether to generate the
4963 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4964 if (TARGET_LINK_STACK == -1)
4965 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4967 /* Deprecate use of -mno-speculate-indirect-jumps. */
4968 if (!rs6000_speculate_indirect_jumps)
4969 warning (0, "%qs is deprecated and not recommended in any circumstances",
4970 "-mno-speculate-indirect-jumps");
4975 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4976 define the target cpu type. */
4979 rs6000_option_override (void)
4981 (void) rs6000_option_override_internal (true);
4985 /* Implement targetm.vectorize.builtin_mask_for_load. */
4987 rs6000_builtin_mask_for_load (void)
4989 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4990 if ((TARGET_ALTIVEC && !TARGET_VSX)
4991 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4992 return altivec_builtin_mask_for_load;
4997 /* Implement LOOP_ALIGN. */
4999 rs6000_loop_align (rtx label)
5004 /* Don't override loop alignment if -falign-loops was specified. */
5005 if (!can_override_loop_align)
5008 bb = BLOCK_FOR_INSN (label);
5009 ninsns = num_loop_insns(bb->loop_father);
5011 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5012 if (ninsns > 4 && ninsns <= 8
5013 && (rs6000_tune == PROCESSOR_POWER4
5014 || rs6000_tune == PROCESSOR_POWER5
5015 || rs6000_tune == PROCESSOR_POWER6
5016 || rs6000_tune == PROCESSOR_POWER7
5017 || rs6000_tune == PROCESSOR_POWER8))
5018 return align_flags (5);
5023 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5024 after applying N number of iterations. This routine does not determine
5025 how may iterations are required to reach desired alignment. */
5028 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5035 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5038 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5048 /* Assuming that all other types are naturally aligned. CHECKME! */
5053 /* Return true if the vector misalignment factor is supported by the
5056 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5063 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5066 /* Return if movmisalign pattern is not supported for this mode. */
5067 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5070 if (misalignment == -1)
5072 /* Misalignment factor is unknown at compile time but we know
5073 it's word aligned. */
5074 if (rs6000_vector_alignment_reachable (type, is_packed))
5076 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5078 if (element_size == 64 || element_size == 32)
5085 /* VSX supports word-aligned vector. */
5086 if (misalignment % 4 == 0)
5092 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5094 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5095 tree vectype, int misalign)
5100 switch (type_of_cost)
5110 case cond_branch_not_taken:
5119 case vec_promote_demote:
5125 case cond_branch_taken:
5128 case unaligned_load:
5129 case vector_gather_load:
5130 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5133 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5135 elements = TYPE_VECTOR_SUBPARTS (vectype);
5137 /* Double word aligned. */
5145 /* Double word aligned. */
5149 /* Unknown misalignment. */
5162 /* Misaligned loads are not supported. */
5167 case unaligned_store:
5168 case vector_scatter_store:
5169 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5172 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5174 elements = TYPE_VECTOR_SUBPARTS (vectype);
5176 /* Double word aligned. */
5184 /* Double word aligned. */
5188 /* Unknown misalignment. */
5201 /* Misaligned stores are not supported. */
5207 /* This is a rough approximation assuming non-constant elements
5208 constructed into a vector via element insertion. FIXME:
5209 vec_construct is not granular enough for uniformly good
5210 decisions. If the initialization is a splat, this is
5211 cheaper than we estimate. Improve this someday. */
5212 elem_type = TREE_TYPE (vectype);
5213 /* 32-bit vectors loaded into registers are stored as double
5214 precision, so we need 2 permutes, 2 converts, and 1 merge
5215 to construct a vector of short floats from them. */
5216 if (SCALAR_FLOAT_TYPE_P (elem_type)
5217 && TYPE_PRECISION (elem_type) == 32)
5219 /* On POWER9, integer vector types are built up in GPRs and then
5220 use a direct move (2 cycles). For POWER8 this is even worse,
5221 as we need two direct moves and a merge, and the direct moves
5223 else if (INTEGRAL_TYPE_P (elem_type))
5225 if (TARGET_P9_VECTOR)
5226 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5228 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5231 /* V2DFmode doesn't need a direct move. */
5239 /* Implement targetm.vectorize.preferred_simd_mode. */
5242 rs6000_preferred_simd_mode (scalar_mode mode)
5251 if (TARGET_ALTIVEC || TARGET_VSX)
5271 typedef struct _rs6000_cost_data
5273 struct loop *loop_info;
5277 /* Test for likely overcommitment of vector hardware resources. If a
5278 loop iteration is relatively large, and too large a percentage of
5279 instructions in the loop are vectorized, the cost model may not
5280 adequately reflect delays from unavailable vector resources.
5281 Penalize the loop body cost for this case. */
5284 rs6000_density_test (rs6000_cost_data *data)
5286 const int DENSITY_PCT_THRESHOLD = 85;
5287 const int DENSITY_SIZE_THRESHOLD = 70;
5288 const int DENSITY_PENALTY = 10;
5289 struct loop *loop = data->loop_info;
5290 basic_block *bbs = get_loop_body (loop);
5291 int nbbs = loop->num_nodes;
5292 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
5293 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5296 for (i = 0; i < nbbs; i++)
5298 basic_block bb = bbs[i];
5299 gimple_stmt_iterator gsi;
5301 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5303 gimple *stmt = gsi_stmt (gsi);
5304 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5306 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5307 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5313 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5315 if (density_pct > DENSITY_PCT_THRESHOLD
5316 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5318 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5319 if (dump_enabled_p ())
5320 dump_printf_loc (MSG_NOTE, vect_location,
5321 "density %d%%, cost %d exceeds threshold, penalizing "
5322 "loop body cost by %d%%", density_pct,
5323 vec_cost + not_vec_cost, DENSITY_PENALTY);
5327 /* Implement targetm.vectorize.init_cost. */
5329 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5330 instruction is needed by the vectorization. */
5331 static bool rs6000_vect_nonmem;
5334 rs6000_init_cost (struct loop *loop_info)
5336 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5337 data->loop_info = loop_info;
5338 data->cost[vect_prologue] = 0;
5339 data->cost[vect_body] = 0;
5340 data->cost[vect_epilogue] = 0;
5341 rs6000_vect_nonmem = false;
5345 /* Implement targetm.vectorize.add_stmt_cost. */
5348 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5349 struct _stmt_vec_info *stmt_info, int misalign,
5350 enum vect_cost_model_location where)
5352 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5353 unsigned retval = 0;
5355 if (flag_vect_cost_model)
5357 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5358 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5360 /* Statements in an inner loop relative to the loop being
5361 vectorized are weighted more heavily. The value here is
5362 arbitrary and could potentially be improved with analysis. */
5363 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5364 count *= 50; /* FIXME. */
5366 retval = (unsigned) (count * stmt_cost);
5367 cost_data->cost[where] += retval;
5369 /* Check whether we're doing something other than just a copy loop.
5370 Not all such loops may be profitably vectorized; see
5371 rs6000_finish_cost. */
5372 if ((kind == vec_to_scalar || kind == vec_perm
5373 || kind == vec_promote_demote || kind == vec_construct
5374 || kind == scalar_to_vec)
5375 || (where == vect_body && kind == vector_stmt))
5376 rs6000_vect_nonmem = true;
5382 /* Implement targetm.vectorize.finish_cost. */
5385 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5386 unsigned *body_cost, unsigned *epilogue_cost)
5388 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5390 if (cost_data->loop_info)
5391 rs6000_density_test (cost_data);
5393 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5394 that require versioning for any reason. The vectorization is at
5395 best a wash inside the loop, and the versioning checks make
5396 profitability highly unlikely and potentially quite harmful. */
5397 if (cost_data->loop_info)
5399 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5400 if (!rs6000_vect_nonmem
5401 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5402 && LOOP_REQUIRES_VERSIONING (vec_info))
5403 cost_data->cost[vect_body] += 10000;
5406 *prologue_cost = cost_data->cost[vect_prologue];
5407 *body_cost = cost_data->cost[vect_body];
5408 *epilogue_cost = cost_data->cost[vect_epilogue];
5411 /* Implement targetm.vectorize.destroy_cost_data. */
5414 rs6000_destroy_cost_data (void *data)
5419 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5420 library with vectorized intrinsics. */
5423 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5427 const char *suffix = NULL;
5428 tree fntype, new_fndecl, bdecl = NULL_TREE;
5431 machine_mode el_mode, in_mode;
5434 /* Libmass is suitable for unsafe math only as it does not correctly support
5435 parts of IEEE with the required precision such as denormals. Only support
5436 it if we have VSX to use the simd d2 or f4 functions.
5437 XXX: Add variable length support. */
5438 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5441 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5442 n = TYPE_VECTOR_SUBPARTS (type_out);
5443 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5444 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5445 if (el_mode != in_mode
5481 if (el_mode == DFmode && n == 2)
5483 bdecl = mathfn_built_in (double_type_node, fn);
5484 suffix = "d2"; /* pow -> powd2 */
5486 else if (el_mode == SFmode && n == 4)
5488 bdecl = mathfn_built_in (float_type_node, fn);
5489 suffix = "4"; /* powf -> powf4 */
5501 gcc_assert (suffix != NULL);
5502 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5506 strcpy (name, bname + sizeof ("__builtin_") - 1);
5507 strcat (name, suffix);
5510 fntype = build_function_type_list (type_out, type_in, NULL);
5511 else if (n_args == 2)
5512 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5516 /* Build a function declaration for the vectorized function. */
5517 new_fndecl = build_decl (BUILTINS_LOCATION,
5518 FUNCTION_DECL, get_identifier (name), fntype);
5519 TREE_PUBLIC (new_fndecl) = 1;
5520 DECL_EXTERNAL (new_fndecl) = 1;
5521 DECL_IS_NOVOPS (new_fndecl) = 1;
5522 TREE_READONLY (new_fndecl) = 1;
5527 /* Returns a function decl for a vectorized version of the builtin function
5528 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5529 if it is not available. */
5532 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5535 machine_mode in_mode, out_mode;
5538 if (TARGET_DEBUG_BUILTIN)
5539 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5540 combined_fn_name (combined_fn (fn)),
5541 GET_MODE_NAME (TYPE_MODE (type_out)),
5542 GET_MODE_NAME (TYPE_MODE (type_in)));
5544 if (TREE_CODE (type_out) != VECTOR_TYPE
5545 || TREE_CODE (type_in) != VECTOR_TYPE)
5548 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5549 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5550 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5551 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5556 if (VECTOR_UNIT_VSX_P (V2DFmode)
5557 && out_mode == DFmode && out_n == 2
5558 && in_mode == DFmode && in_n == 2)
5559 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5560 if (VECTOR_UNIT_VSX_P (V4SFmode)
5561 && out_mode == SFmode && out_n == 4
5562 && in_mode == SFmode && in_n == 4)
5563 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5564 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5565 && out_mode == SFmode && out_n == 4
5566 && in_mode == SFmode && in_n == 4)
5567 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5570 if (VECTOR_UNIT_VSX_P (V2DFmode)
5571 && out_mode == DFmode && out_n == 2
5572 && in_mode == DFmode && in_n == 2)
5573 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5574 if (VECTOR_UNIT_VSX_P (V4SFmode)
5575 && out_mode == SFmode && out_n == 4
5576 && in_mode == SFmode && in_n == 4)
5577 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5578 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5579 && out_mode == SFmode && out_n == 4
5580 && in_mode == SFmode && in_n == 4)
5581 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5584 if (VECTOR_UNIT_VSX_P (V2DFmode)
5585 && out_mode == DFmode && out_n == 2
5586 && in_mode == DFmode && in_n == 2)
5587 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5588 if (VECTOR_UNIT_VSX_P (V4SFmode)
5589 && out_mode == SFmode && out_n == 4
5590 && in_mode == SFmode && in_n == 4)
5591 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5592 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5593 && out_mode == SFmode && out_n == 4
5594 && in_mode == SFmode && in_n == 4)
5595 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5598 if (VECTOR_UNIT_VSX_P (V2DFmode)
5599 && out_mode == DFmode && out_n == 2
5600 && in_mode == DFmode && in_n == 2)
5601 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5602 if (VECTOR_UNIT_VSX_P (V4SFmode)
5603 && out_mode == SFmode && out_n == 4
5604 && in_mode == SFmode && in_n == 4)
5605 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5606 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5607 && out_mode == SFmode && out_n == 4
5608 && in_mode == SFmode && in_n == 4)
5609 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5612 if (VECTOR_UNIT_VSX_P (V2DFmode)
5613 && out_mode == DFmode && out_n == 2
5614 && in_mode == DFmode && in_n == 2)
5615 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5616 if (VECTOR_UNIT_VSX_P (V4SFmode)
5617 && out_mode == SFmode && out_n == 4
5618 && in_mode == SFmode && in_n == 4)
5619 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5620 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5621 && out_mode == SFmode && out_n == 4
5622 && in_mode == SFmode && in_n == 4)
5623 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5626 if (VECTOR_UNIT_VSX_P (V2DFmode)
5627 && flag_unsafe_math_optimizations
5628 && out_mode == DFmode && out_n == 2
5629 && in_mode == DFmode && in_n == 2)
5630 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5631 if (VECTOR_UNIT_VSX_P (V4SFmode)
5632 && flag_unsafe_math_optimizations
5633 && out_mode == SFmode && out_n == 4
5634 && in_mode == SFmode && in_n == 4)
5635 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5638 if (VECTOR_UNIT_VSX_P (V2DFmode)
5639 && !flag_trapping_math
5640 && out_mode == DFmode && out_n == 2
5641 && in_mode == DFmode && in_n == 2)
5642 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5643 if (VECTOR_UNIT_VSX_P (V4SFmode)
5644 && !flag_trapping_math
5645 && out_mode == SFmode && out_n == 4
5646 && in_mode == SFmode && in_n == 4)
5647 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5653 /* Generate calls to libmass if appropriate. */
5654 if (rs6000_veclib_handler)
5655 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5660 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5663 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5666 machine_mode in_mode, out_mode;
5669 if (TARGET_DEBUG_BUILTIN)
5670 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5671 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5672 GET_MODE_NAME (TYPE_MODE (type_out)),
5673 GET_MODE_NAME (TYPE_MODE (type_in)));
5675 if (TREE_CODE (type_out) != VECTOR_TYPE
5676 || TREE_CODE (type_in) != VECTOR_TYPE)
5679 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5680 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5681 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5682 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5684 enum rs6000_builtins fn
5685 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5688 case RS6000_BUILTIN_RSQRTF:
5689 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5690 && out_mode == SFmode && out_n == 4
5691 && in_mode == SFmode && in_n == 4)
5692 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5694 case RS6000_BUILTIN_RSQRT:
5695 if (VECTOR_UNIT_VSX_P (V2DFmode)
5696 && out_mode == DFmode && out_n == 2
5697 && in_mode == DFmode && in_n == 2)
5698 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5700 case RS6000_BUILTIN_RECIPF:
5701 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5702 && out_mode == SFmode && out_n == 4
5703 && in_mode == SFmode && in_n == 4)
5704 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5706 case RS6000_BUILTIN_RECIP:
5707 if (VECTOR_UNIT_VSX_P (V2DFmode)
5708 && out_mode == DFmode && out_n == 2
5709 && in_mode == DFmode && in_n == 2)
5710 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5718 /* Default CPU string for rs6000*_file_start functions. */
5719 static const char *rs6000_default_cpu;
5721 /* Do anything needed at the start of the asm file. */
5724 rs6000_file_start (void)
5727 const char *start = buffer;
5728 FILE *file = asm_out_file;
5730 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5732 default_file_start ();
5734 if (flag_verbose_asm)
5736 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5738 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5740 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5744 if (global_options_set.x_rs6000_cpu_index)
5746 fprintf (file, "%s -mcpu=%s", start,
5747 processor_target_table[rs6000_cpu_index].name);
5751 if (global_options_set.x_rs6000_tune_index)
5753 fprintf (file, "%s -mtune=%s", start,
5754 processor_target_table[rs6000_tune_index].name);
5758 if (PPC405_ERRATUM77)
5760 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5764 #ifdef USING_ELFOS_H
5765 switch (rs6000_sdata)
5767 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5768 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5769 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5770 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5773 if (rs6000_sdata && g_switch_value)
5775 fprintf (file, "%s -G %d", start,
5785 #ifdef USING_ELFOS_H
5786 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
5787 && !global_options_set.x_rs6000_cpu_index)
5789 fputs ("\t.machine ", asm_out_file);
5790 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
5791 fputs ("power9\n", asm_out_file);
5792 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5793 fputs ("power8\n", asm_out_file);
5794 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5795 fputs ("power7\n", asm_out_file);
5796 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5797 fputs ("power6\n", asm_out_file);
5798 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5799 fputs ("power5\n", asm_out_file);
5800 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5801 fputs ("power4\n", asm_out_file);
5802 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5803 fputs ("ppc64\n", asm_out_file);
5805 fputs ("ppc\n", asm_out_file);
5809 if (DEFAULT_ABI == ABI_ELFv2)
5810 fprintf (file, "\t.abiversion 2\n");
5814 /* Return nonzero if this function is known to have a null epilogue. */
5817 direct_return (void)
5819 if (reload_completed)
5821 rs6000_stack_t *info = rs6000_stack_info ();
5823 if (info->first_gp_reg_save == 32
5824 && info->first_fp_reg_save == 64
5825 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5826 && ! info->lr_save_p
5827 && ! info->cr_save_p
5828 && info->vrsave_size == 0
5836 /* Helper for num_insns_constant. Calculate number of instructions to
5837 load VALUE to a single gpr using combinations of addi, addis, ori,
5838 oris and sldi instructions. */
5841 num_insns_constant_gpr (HOST_WIDE_INT value)
5843 /* signed constant loadable with addi */
5844 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5847 /* constant loadable with addis */
5848 else if ((value & 0xffff) == 0
5849 && (value >> 31 == -1 || value >> 31 == 0))
5852 else if (TARGET_POWERPC64)
5854 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5855 HOST_WIDE_INT high = value >> 31;
5857 if (high == 0 || high == -1)
5863 return num_insns_constant_gpr (high) + 1;
5865 return num_insns_constant_gpr (low) + 1;
5867 return (num_insns_constant_gpr (high)
5868 + num_insns_constant_gpr (low) + 1);
5875 /* Helper for num_insns_constant. Allow constants formed by the
5876 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5877 and handle modes that require multiple gprs. */
5880 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5882 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5886 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5887 int insns = num_insns_constant_gpr (low);
5889 /* We won't get more than 2 from num_insns_constant_gpr
5890 except when TARGET_POWERPC64 and mode is DImode or
5891 wider, so the register mode must be DImode. */
5892 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5895 value >>= BITS_PER_WORD;
5900 /* Return the number of instructions it takes to form a constant in as
5901 many gprs are needed for MODE. */
5904 num_insns_constant (rtx op, machine_mode mode)
5908 switch (GET_CODE (op))
5914 case CONST_WIDE_INT:
5917 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5918 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5925 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5927 if (mode == SFmode || mode == SDmode)
5932 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5934 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5935 /* See the first define_split in rs6000.md handling a
5936 const_double_operand. */
5940 else if (mode == DFmode || mode == DDmode)
5945 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5947 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5949 /* See the second (32-bit) and third (64-bit) define_split
5950 in rs6000.md handling a const_double_operand. */
5951 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5952 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5955 else if (mode == TFmode || mode == TDmode
5956 || mode == KFmode || mode == IFmode)
5962 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5964 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5966 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5967 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5968 insns = num_insns_constant_multi (val, DImode);
5969 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5970 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5971 insns += num_insns_constant_multi (val, DImode);
5983 return num_insns_constant_multi (val, mode);
5986 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5987 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5988 corresponding element of the vector, but for V4SFmode, the
5989 corresponding "float" is interpreted as an SImode integer. */
5992 const_vector_elt_as_int (rtx op, unsigned int elt)
5996 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5997 gcc_assert (GET_MODE (op) != V2DImode
5998 && GET_MODE (op) != V2DFmode);
6000 tmp = CONST_VECTOR_ELT (op, elt);
6001 if (GET_MODE (op) == V4SFmode)
6002 tmp = gen_lowpart (SImode, tmp);
6003 return INTVAL (tmp);
6006 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6007 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6008 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6009 all items are set to the same value and contain COPIES replicas of the
6010 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6011 operand and the others are set to the value of the operand's msb. */
6014 vspltis_constant (rtx op, unsigned step, unsigned copies)
6016 machine_mode mode = GET_MODE (op);
6017 machine_mode inner = GET_MODE_INNER (mode);
6025 HOST_WIDE_INT splat_val;
6026 HOST_WIDE_INT msb_val;
6028 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6031 nunits = GET_MODE_NUNITS (mode);
6032 bitsize = GET_MODE_BITSIZE (inner);
6033 mask = GET_MODE_MASK (inner);
6035 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6037 msb_val = val >= 0 ? 0 : -1;
6039 /* Construct the value to be splatted, if possible. If not, return 0. */
6040 for (i = 2; i <= copies; i *= 2)
6042 HOST_WIDE_INT small_val;
6044 small_val = splat_val >> bitsize;
6046 if (splat_val != ((HOST_WIDE_INT)
6047 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6048 | (small_val & mask)))
6050 splat_val = small_val;
6053 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6054 if (EASY_VECTOR_15 (splat_val))
6057 /* Also check if we can splat, and then add the result to itself. Do so if
6058 the value is positive, of if the splat instruction is using OP's mode;
6059 for splat_val < 0, the splat and the add should use the same mode. */
6060 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6061 && (splat_val >= 0 || (step == 1 && copies == 1)))
6064 /* Also check if are loading up the most significant bit which can be done by
6065 loading up -1 and shifting the value left by -1. */
6066 else if (EASY_VECTOR_MSB (splat_val, inner))
6072 /* Check if VAL is present in every STEP-th element, and the
6073 other elements are filled with its most significant bit. */
6074 for (i = 1; i < nunits; ++i)
6076 HOST_WIDE_INT desired_val;
6077 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6078 if ((i & (step - 1)) == 0)
6081 desired_val = msb_val;
6083 if (desired_val != const_vector_elt_as_int (op, elt))
6090 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6091 instruction, filling in the bottom elements with 0 or -1.
6093 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6094 for the number of zeroes to shift in, or negative for the number of 0xff
6097 OP is a CONST_VECTOR. */
6100 vspltis_shifted (rtx op)
6102 machine_mode mode = GET_MODE (op);
6103 machine_mode inner = GET_MODE_INNER (mode);
6111 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6114 /* We need to create pseudo registers to do the shift, so don't recognize
6115 shift vector constants after reload. */
6116 if (!can_create_pseudo_p ())
6119 nunits = GET_MODE_NUNITS (mode);
6120 mask = GET_MODE_MASK (inner);
6122 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6124 /* Check if the value can really be the operand of a vspltis[bhw]. */
6125 if (EASY_VECTOR_15 (val))
6128 /* Also check if we are loading up the most significant bit which can be done
6129 by loading up -1 and shifting the value left by -1. */
6130 else if (EASY_VECTOR_MSB (val, inner))
6136 /* Check if VAL is present in every STEP-th element until we find elements
6137 that are 0 or all 1 bits. */
6138 for (i = 1; i < nunits; ++i)
6140 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6141 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6143 /* If the value isn't the splat value, check for the remaining elements
6149 for (j = i+1; j < nunits; ++j)
6151 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6152 if (const_vector_elt_as_int (op, elt2) != 0)
6156 return (nunits - i) * GET_MODE_SIZE (inner);
6159 else if ((elt_val & mask) == mask)
6161 for (j = i+1; j < nunits; ++j)
6163 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6164 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6168 return -((nunits - i) * GET_MODE_SIZE (inner));
6176 /* If all elements are equal, we don't need to do VLSDOI. */
6181 /* Return true if OP is of the given MODE and can be synthesized
6182 with a vspltisb, vspltish or vspltisw. */
6185 easy_altivec_constant (rtx op, machine_mode mode)
6187 unsigned step, copies;
6189 if (mode == VOIDmode)
6190 mode = GET_MODE (op);
6191 else if (mode != GET_MODE (op))
6194 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6196 if (mode == V2DFmode)
6197 return zero_constant (op, mode);
6199 else if (mode == V2DImode)
6201 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6202 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6205 if (zero_constant (op, mode))
6208 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6209 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6215 /* V1TImode is a special container for TImode. Ignore for now. */
6216 else if (mode == V1TImode)
6219 /* Start with a vspltisw. */
6220 step = GET_MODE_NUNITS (mode) / 4;
6223 if (vspltis_constant (op, step, copies))
6226 /* Then try with a vspltish. */
6232 if (vspltis_constant (op, step, copies))
6235 /* And finally a vspltisb. */
6241 if (vspltis_constant (op, step, copies))
6244 if (vspltis_shifted (op) != 0)
6250 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6251 result is OP. Abort if it is not possible. */
6254 gen_easy_altivec_constant (rtx op)
6256 machine_mode mode = GET_MODE (op);
6257 int nunits = GET_MODE_NUNITS (mode);
6258 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6259 unsigned step = nunits / 4;
6260 unsigned copies = 1;
6262 /* Start with a vspltisw. */
6263 if (vspltis_constant (op, step, copies))
6264 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6266 /* Then try with a vspltish. */
6272 if (vspltis_constant (op, step, copies))
6273 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6275 /* And finally a vspltisb. */
6281 if (vspltis_constant (op, step, copies))
6282 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6287 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6288 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6290 Return the number of instructions needed (1 or 2) into the address pointed
6293 Return the constant that is being split via CONSTANT_PTR. */
6296 xxspltib_constant_p (rtx op,
6301 size_t nunits = GET_MODE_NUNITS (mode);
6303 HOST_WIDE_INT value;
6306 /* Set the returned values to out of bound values. */
6307 *num_insns_ptr = -1;
6308 *constant_ptr = 256;
6310 if (!TARGET_P9_VECTOR)
6313 if (mode == VOIDmode)
6314 mode = GET_MODE (op);
6316 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6319 /* Handle (vec_duplicate <constant>). */
6320 if (GET_CODE (op) == VEC_DUPLICATE)
6322 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6323 && mode != V2DImode)
6326 element = XEXP (op, 0);
6327 if (!CONST_INT_P (element))
6330 value = INTVAL (element);
6331 if (!IN_RANGE (value, -128, 127))
6335 /* Handle (const_vector [...]). */
6336 else if (GET_CODE (op) == CONST_VECTOR)
6338 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6339 && mode != V2DImode)
6342 element = CONST_VECTOR_ELT (op, 0);
6343 if (!CONST_INT_P (element))
6346 value = INTVAL (element);
6347 if (!IN_RANGE (value, -128, 127))
6350 for (i = 1; i < nunits; i++)
6352 element = CONST_VECTOR_ELT (op, i);
6353 if (!CONST_INT_P (element))
6356 if (value != INTVAL (element))
6361 /* Handle integer constants being loaded into the upper part of the VSX
6362 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6363 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6364 else if (CONST_INT_P (op))
6366 if (!SCALAR_INT_MODE_P (mode))
6369 value = INTVAL (op);
6370 if (!IN_RANGE (value, -128, 127))
6373 if (!IN_RANGE (value, -1, 0))
6375 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6378 if (EASY_VECTOR_15 (value))
6386 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6387 sign extend. Special case 0/-1 to allow getting any VSX register instead
6388 of an Altivec register. */
6389 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6390 && EASY_VECTOR_15 (value))
6393 /* Return # of instructions and the constant byte for XXSPLTIB. */
6394 if (mode == V16QImode)
6397 else if (IN_RANGE (value, -1, 0))
6403 *constant_ptr = (int) value;
6408 output_vec_const_move (rtx *operands)
6416 mode = GET_MODE (dest);
6420 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6421 int xxspltib_value = 256;
6424 if (zero_constant (vec, mode))
6426 if (TARGET_P9_VECTOR)
6427 return "xxspltib %x0,0";
6429 else if (dest_vmx_p)
6430 return "vspltisw %0,0";
6433 return "xxlxor %x0,%x0,%x0";
6436 if (all_ones_constant (vec, mode))
6438 if (TARGET_P9_VECTOR)
6439 return "xxspltib %x0,255";
6441 else if (dest_vmx_p)
6442 return "vspltisw %0,-1";
6444 else if (TARGET_P8_VECTOR)
6445 return "xxlorc %x0,%x0,%x0";
6451 if (TARGET_P9_VECTOR
6452 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6456 operands[2] = GEN_INT (xxspltib_value & 0xff);
6457 return "xxspltib %x0,%2";
6468 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6469 if (zero_constant (vec, mode))
6470 return "vspltisw %0,0";
6472 if (all_ones_constant (vec, mode))
6473 return "vspltisw %0,-1";
6475 /* Do we need to construct a value using VSLDOI? */
6476 shift = vspltis_shifted (vec);
6480 splat_vec = gen_easy_altivec_constant (vec);
6481 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6482 operands[1] = XEXP (splat_vec, 0);
6483 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6486 switch (GET_MODE (splat_vec))
6489 return "vspltisw %0,%1";
6492 return "vspltish %0,%1";
6495 return "vspltisb %0,%1";
6505 /* Initialize vector TARGET to VALS. */
6508 rs6000_expand_vector_init (rtx target, rtx vals)
6510 machine_mode mode = GET_MODE (target);
6511 machine_mode inner_mode = GET_MODE_INNER (mode);
6512 int n_elts = GET_MODE_NUNITS (mode);
6513 int n_var = 0, one_var = -1;
6514 bool all_same = true, all_const_zero = true;
6518 for (i = 0; i < n_elts; ++i)
6520 x = XVECEXP (vals, 0, i);
6521 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6522 ++n_var, one_var = i;
6523 else if (x != CONST0_RTX (inner_mode))
6524 all_const_zero = false;
6526 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6532 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6533 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6534 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6536 /* Zero register. */
6537 emit_move_insn (target, CONST0_RTX (mode));
6540 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6542 /* Splat immediate. */
6543 emit_insn (gen_rtx_SET (target, const_vec));
6548 /* Load from constant pool. */
6549 emit_move_insn (target, const_vec);
6554 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6555 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6559 size_t num_elements = all_same ? 1 : 2;
6560 for (i = 0; i < num_elements; i++)
6562 op[i] = XVECEXP (vals, 0, i);
6563 /* Just in case there is a SUBREG with a smaller mode, do a
6565 if (GET_MODE (op[i]) != inner_mode)
6567 rtx tmp = gen_reg_rtx (inner_mode);
6568 convert_move (tmp, op[i], 0);
6571 /* Allow load with splat double word. */
6572 else if (MEM_P (op[i]))
6575 op[i] = force_reg (inner_mode, op[i]);
6577 else if (!REG_P (op[i]))
6578 op[i] = force_reg (inner_mode, op[i]);
6583 if (mode == V2DFmode)
6584 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6586 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6590 if (mode == V2DFmode)
6591 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6593 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6598 /* Special case initializing vector int if we are on 64-bit systems with
6599 direct move or we have the ISA 3.0 instructions. */
6600 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6601 && TARGET_DIRECT_MOVE_64BIT)
6605 rtx element0 = XVECEXP (vals, 0, 0);
6606 if (MEM_P (element0))
6607 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6609 element0 = force_reg (SImode, element0);
6611 if (TARGET_P9_VECTOR)
6612 emit_insn (gen_vsx_splat_v4si (target, element0));
6615 rtx tmp = gen_reg_rtx (DImode);
6616 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6617 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6626 for (i = 0; i < 4; i++)
6627 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6629 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6630 elements[2], elements[3]));
6635 /* With single precision floating point on VSX, know that internally single
6636 precision is actually represented as a double, and either make 2 V2DF
6637 vectors, and convert these vectors to single precision, or do one
6638 conversion, and splat the result to the other elements. */
6639 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6643 rtx element0 = XVECEXP (vals, 0, 0);
6645 if (TARGET_P9_VECTOR)
6647 if (MEM_P (element0))
6648 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6650 emit_insn (gen_vsx_splat_v4sf (target, element0));
6655 rtx freg = gen_reg_rtx (V4SFmode);
6656 rtx sreg = force_reg (SFmode, element0);
6657 rtx cvt = (TARGET_XSCVDPSPN
6658 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6659 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6662 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6668 rtx dbl_even = gen_reg_rtx (V2DFmode);
6669 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6670 rtx flt_even = gen_reg_rtx (V4SFmode);
6671 rtx flt_odd = gen_reg_rtx (V4SFmode);
6672 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6673 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6674 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6675 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6677 /* Use VMRGEW if we can instead of doing a permute. */
6678 if (TARGET_P8_VECTOR)
6680 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6681 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6682 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6683 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6684 if (BYTES_BIG_ENDIAN)
6685 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6687 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6691 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6692 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6693 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6694 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6695 rs6000_expand_extract_even (target, flt_even, flt_odd);
6701 /* Special case initializing vector short/char that are splats if we are on
6702 64-bit systems with direct move. */
6703 if (all_same && TARGET_DIRECT_MOVE_64BIT
6704 && (mode == V16QImode || mode == V8HImode))
6706 rtx op0 = XVECEXP (vals, 0, 0);
6707 rtx di_tmp = gen_reg_rtx (DImode);
6710 op0 = force_reg (GET_MODE_INNER (mode), op0);
6712 if (mode == V16QImode)
6714 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6715 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6719 if (mode == V8HImode)
6721 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6722 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6727 /* Store value to stack temp. Load vector element. Splat. However, splat
6728 of 64-bit items is not supported on Altivec. */
6729 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6731 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6732 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6733 XVECEXP (vals, 0, 0));
6734 x = gen_rtx_UNSPEC (VOIDmode,
6735 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6736 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6738 gen_rtx_SET (target, mem),
6740 x = gen_rtx_VEC_SELECT (inner_mode, target,
6741 gen_rtx_PARALLEL (VOIDmode,
6742 gen_rtvec (1, const0_rtx)));
6743 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6747 /* One field is non-constant. Load constant then overwrite
6751 rtx copy = copy_rtx (vals);
6753 /* Load constant part of vector, substitute neighboring value for
6755 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6756 rs6000_expand_vector_init (target, copy);
6758 /* Insert variable. */
6759 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6763 /* Construct the vector in memory one field at a time
6764 and load the whole vector. */
6765 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6766 for (i = 0; i < n_elts; i++)
6767 emit_move_insn (adjust_address_nv (mem, inner_mode,
6768 i * GET_MODE_SIZE (inner_mode)),
6769 XVECEXP (vals, 0, i));
6770 emit_move_insn (target, mem);
6773 /* Set field ELT of TARGET to VAL. */
6776 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6778 machine_mode mode = GET_MODE (target);
6779 machine_mode inner_mode = GET_MODE_INNER (mode);
6780 rtx reg = gen_reg_rtx (mode);
6782 int width = GET_MODE_SIZE (inner_mode);
6785 val = force_reg (GET_MODE (val), val);
6787 if (VECTOR_MEM_VSX_P (mode))
6789 rtx insn = NULL_RTX;
6790 rtx elt_rtx = GEN_INT (elt);
6792 if (mode == V2DFmode)
6793 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6795 else if (mode == V2DImode)
6796 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6798 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6800 if (mode == V4SImode)
6801 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6802 else if (mode == V8HImode)
6803 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6804 else if (mode == V16QImode)
6805 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6806 else if (mode == V4SFmode)
6807 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6817 /* Simplify setting single element vectors like V1TImode. */
6818 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6820 emit_move_insn (target, gen_lowpart (mode, val));
6824 /* Load single variable value. */
6825 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6826 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6827 x = gen_rtx_UNSPEC (VOIDmode,
6828 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6829 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6831 gen_rtx_SET (reg, mem),
6834 /* Linear sequence. */
6835 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6836 for (i = 0; i < 16; ++i)
6837 XVECEXP (mask, 0, i) = GEN_INT (i);
6839 /* Set permute mask to insert element into target. */
6840 for (i = 0; i < width; ++i)
6841 XVECEXP (mask, 0, elt*width + i)
6842 = GEN_INT (i + 0x10);
6843 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6845 if (BYTES_BIG_ENDIAN)
6846 x = gen_rtx_UNSPEC (mode,
6847 gen_rtvec (3, target, reg,
6848 force_reg (V16QImode, x)),
6852 if (TARGET_P9_VECTOR)
6853 x = gen_rtx_UNSPEC (mode,
6854 gen_rtvec (3, reg, target,
6855 force_reg (V16QImode, x)),
6859 /* Invert selector. We prefer to generate VNAND on P8 so
6860 that future fusion opportunities can kick in, but must
6861 generate VNOR elsewhere. */
6862 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6863 rtx iorx = (TARGET_P8_VECTOR
6864 ? gen_rtx_IOR (V16QImode, notx, notx)
6865 : gen_rtx_AND (V16QImode, notx, notx));
6866 rtx tmp = gen_reg_rtx (V16QImode);
6867 emit_insn (gen_rtx_SET (tmp, iorx));
6869 /* Permute with operands reversed and adjusted selector. */
6870 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6875 emit_insn (gen_rtx_SET (target, x));
6878 /* Extract field ELT from VEC into TARGET. */
6881 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6883 machine_mode mode = GET_MODE (vec);
6884 machine_mode inner_mode = GET_MODE_INNER (mode);
6887 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6894 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
6895 emit_move_insn (target, gen_lowpart (TImode, vec));
6898 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6901 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6904 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6907 if (TARGET_DIRECT_MOVE_64BIT)
6909 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6915 if (TARGET_DIRECT_MOVE_64BIT)
6917 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6923 if (TARGET_DIRECT_MOVE_64BIT)
6925 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6931 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6932 && TARGET_DIRECT_MOVE_64BIT)
6934 if (GET_MODE (elt) != DImode)
6936 rtx tmp = gen_reg_rtx (DImode);
6937 convert_move (tmp, elt, 0);
6940 else if (!REG_P (elt))
6941 elt = force_reg (DImode, elt);
6946 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6950 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6954 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6958 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6962 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6966 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6974 gcc_assert (CONST_INT_P (elt));
6976 /* Allocate mode-sized buffer. */
6977 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6979 emit_move_insn (mem, vec);
6981 /* Add offset to field within buffer matching vector element. */
6982 mem = adjust_address_nv (mem, inner_mode,
6983 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
6985 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6988 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6989 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6990 temporary (BASE_TMP) to fixup the address. Return the new memory address
6991 that is valid for reads or writes to a given register (SCALAR_REG). */
6994 rs6000_adjust_vec_address (rtx scalar_reg,
6998 machine_mode scalar_mode)
7000 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7001 rtx addr = XEXP (mem, 0);
7006 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7007 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7009 /* Calculate what we need to add to the address to get the element
7011 if (CONST_INT_P (element))
7012 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7015 int byte_shift = exact_log2 (scalar_size);
7016 gcc_assert (byte_shift >= 0);
7018 if (byte_shift == 0)
7019 element_offset = element;
7023 if (TARGET_POWERPC64)
7024 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7026 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7028 element_offset = base_tmp;
7032 /* Create the new address pointing to the element within the vector. If we
7033 are adding 0, we don't have to change the address. */
7034 if (element_offset == const0_rtx)
7037 /* A simple indirect address can be converted into a reg + offset
7039 else if (REG_P (addr) || SUBREG_P (addr))
7040 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7042 /* Optimize D-FORM addresses with constant offset with a constant element, to
7043 include the element offset in the address directly. */
7044 else if (GET_CODE (addr) == PLUS)
7046 rtx op0 = XEXP (addr, 0);
7047 rtx op1 = XEXP (addr, 1);
7050 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7051 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7053 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7054 rtx offset_rtx = GEN_INT (offset);
7056 if (IN_RANGE (offset, -32768, 32767)
7057 && (scalar_size < 8 || (offset & 0x3) == 0))
7058 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7061 emit_move_insn (base_tmp, offset_rtx);
7062 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7067 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7068 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7070 /* Note, ADDI requires the register being added to be a base
7071 register. If the register was R0, load it up into the temporary
7074 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7076 insn = gen_add3_insn (base_tmp, op1, element_offset);
7077 gcc_assert (insn != NULL_RTX);
7082 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7084 insn = gen_add3_insn (base_tmp, element_offset, op1);
7085 gcc_assert (insn != NULL_RTX);
7091 emit_move_insn (base_tmp, op1);
7092 emit_insn (gen_add2_insn (base_tmp, element_offset));
7095 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7101 emit_move_insn (base_tmp, addr);
7102 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7105 /* If we have a PLUS, we need to see whether the particular register class
7106 allows for D-FORM or X-FORM addressing. */
7107 if (GET_CODE (new_addr) == PLUS)
7109 rtx op1 = XEXP (new_addr, 1);
7110 addr_mask_type addr_mask;
7111 unsigned int scalar_regno = reg_or_subregno (scalar_reg);
7113 gcc_assert (HARD_REGISTER_NUM_P (scalar_regno));
7114 if (INT_REGNO_P (scalar_regno))
7115 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7117 else if (FP_REGNO_P (scalar_regno))
7118 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7120 else if (ALTIVEC_REGNO_P (scalar_regno))
7121 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7126 if (REG_P (op1) || SUBREG_P (op1))
7127 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7129 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7132 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7133 valid_addr_p = true;
7136 valid_addr_p = false;
7140 emit_move_insn (base_tmp, new_addr);
7141 new_addr = base_tmp;
7144 return change_address (mem, scalar_mode, new_addr);
7147 /* Split a variable vec_extract operation into the component instructions. */
7150 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7153 machine_mode mode = GET_MODE (src);
7154 machine_mode scalar_mode = GET_MODE (dest);
7155 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7156 int byte_shift = exact_log2 (scalar_size);
7158 gcc_assert (byte_shift >= 0);
7160 /* If we are given a memory address, optimize to load just the element. We
7161 don't have to adjust the vector element number on little endian
7165 gcc_assert (REG_P (tmp_gpr));
7166 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7167 tmp_gpr, scalar_mode));
7171 else if (REG_P (src) || SUBREG_P (src))
7173 int bit_shift = byte_shift + 3;
7175 unsigned int dest_regno = reg_or_subregno (dest);
7176 unsigned int src_regno = reg_or_subregno (src);
7177 unsigned int element_regno = reg_or_subregno (element);
7179 gcc_assert (REG_P (tmp_gpr));
7181 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7182 a general purpose register. */
7183 if (TARGET_P9_VECTOR
7184 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7185 && INT_REGNO_P (dest_regno)
7186 && ALTIVEC_REGNO_P (src_regno)
7187 && INT_REGNO_P (element_regno))
7189 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7190 rtx element_si = gen_rtx_REG (SImode, element_regno);
7192 if (mode == V16QImode)
7193 emit_insn (BYTES_BIG_ENDIAN
7194 ? gen_vextublx (dest_si, element_si, src)
7195 : gen_vextubrx (dest_si, element_si, src));
7197 else if (mode == V8HImode)
7199 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7200 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7201 emit_insn (BYTES_BIG_ENDIAN
7202 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7203 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7209 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7210 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7211 emit_insn (BYTES_BIG_ENDIAN
7212 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7213 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7220 gcc_assert (REG_P (tmp_altivec));
7222 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7223 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7224 will shift the element into the upper position (adding 3 to convert a
7225 byte shift into a bit shift). */
7226 if (scalar_size == 8)
7228 if (!BYTES_BIG_ENDIAN)
7230 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7236 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7238 emit_insn (gen_rtx_SET (tmp_gpr,
7239 gen_rtx_AND (DImode,
7240 gen_rtx_ASHIFT (DImode,
7247 if (!BYTES_BIG_ENDIAN)
7249 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7251 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7252 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7258 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7261 /* Get the value into the lower byte of the Altivec register where VSLO
7263 if (TARGET_P9_VECTOR)
7264 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7265 else if (can_create_pseudo_p ())
7266 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7269 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7270 emit_move_insn (tmp_di, tmp_gpr);
7271 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7274 /* Do the VSLO to get the value into the final location. */
7278 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7282 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7287 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7288 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7289 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7290 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7293 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7301 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7302 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7303 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7304 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7306 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7307 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7308 GEN_INT (64 - (8 * scalar_size))));
7322 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7323 selects whether the alignment is abi mandated, optional, or
7324 both abi and optional alignment. */
7327 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7329 if (how != align_opt)
7331 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7335 if (how != align_abi)
7337 if (TREE_CODE (type) == ARRAY_TYPE
7338 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7340 if (align < BITS_PER_WORD)
7341 align = BITS_PER_WORD;
7348 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7349 instructions simply ignore the low bits; VSX memory instructions
7350 are aligned to 4 or 8 bytes. */
7353 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7355 return (STRICT_ALIGNMENT
7356 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7357 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7358 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7359 && (int) align < VECTOR_ALIGN (mode)))));
7362 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7365 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7367 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7369 if (computed != 128)
7372 if (!warned && warn_psabi)
7375 inform (input_location,
7376 "the layout of aggregates containing vectors with"
7377 " %d-byte alignment has changed in GCC 5",
7378 computed / BITS_PER_UNIT);
7381 /* In current GCC there is no special case. */
7388 /* AIX increases natural record alignment to doubleword if the first
7389 field is an FP double while the FP fields remain word aligned. */
7392 rs6000_special_round_type_align (tree type, unsigned int computed,
7393 unsigned int specified)
7395 unsigned int align = MAX (computed, specified);
7396 tree field = TYPE_FIELDS (type);
7398 /* Skip all non field decls */
7399 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7400 field = DECL_CHAIN (field);
7402 if (field != NULL && field != type)
7404 type = TREE_TYPE (field);
7405 while (TREE_CODE (type) == ARRAY_TYPE)
7406 type = TREE_TYPE (type);
7408 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7409 align = MAX (align, 64);
7415 /* Darwin increases record alignment to the natural alignment of
7419 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7420 unsigned int specified)
7422 unsigned int align = MAX (computed, specified);
7424 if (TYPE_PACKED (type))
7427 /* Find the first field, looking down into aggregates. */
7429 tree field = TYPE_FIELDS (type);
7430 /* Skip all non field decls */
7431 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7432 field = DECL_CHAIN (field);
7435 /* A packed field does not contribute any extra alignment. */
7436 if (DECL_PACKED (field))
7438 type = TREE_TYPE (field);
7439 while (TREE_CODE (type) == ARRAY_TYPE)
7440 type = TREE_TYPE (type);
7441 } while (AGGREGATE_TYPE_P (type));
7443 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7444 align = MAX (align, TYPE_ALIGN (type));
7449 /* Return 1 for an operand in small memory on V.4/eabi. */
7452 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7453 machine_mode mode ATTRIBUTE_UNUSED)
7458 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7461 if (DEFAULT_ABI != ABI_V4)
7464 if (SYMBOL_REF_P (op))
7467 else if (GET_CODE (op) != CONST
7468 || GET_CODE (XEXP (op, 0)) != PLUS
7469 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7470 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7475 rtx sum = XEXP (op, 0);
7476 HOST_WIDE_INT summand;
7478 /* We have to be careful here, because it is the referenced address
7479 that must be 32k from _SDA_BASE_, not just the symbol. */
7480 summand = INTVAL (XEXP (sum, 1));
7481 if (summand < 0 || summand > g_switch_value)
7484 sym_ref = XEXP (sum, 0);
7487 return SYMBOL_REF_SMALL_P (sym_ref);
7493 /* Return true if either operand is a general purpose register. */
7496 gpr_or_gpr_p (rtx op0, rtx op1)
7498 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7499 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7502 /* Return true if this is a move direct operation between GPR registers and
7503 floating point/VSX registers. */
7506 direct_move_p (rtx op0, rtx op1)
7510 if (!REG_P (op0) || !REG_P (op1))
7513 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
7516 regno0 = REGNO (op0);
7517 regno1 = REGNO (op1);
7518 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7521 if (INT_REGNO_P (regno0))
7522 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
7524 else if (INT_REGNO_P (regno1))
7526 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
7529 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
7536 /* Return true if the OFFSET is valid for the quad address instructions that
7537 use d-form (register + offset) addressing. */
7540 quad_address_offset_p (HOST_WIDE_INT offset)
7542 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
7545 /* Return true if the ADDR is an acceptable address for a quad memory
7546 operation of mode MODE (either LQ/STQ for general purpose registers, or
7547 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7548 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7549 3.0 LXV/STXV instruction. */
7552 quad_address_p (rtx addr, machine_mode mode, bool strict)
7556 if (GET_MODE_SIZE (mode) != 16)
7559 if (legitimate_indirect_address_p (addr, strict))
7562 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7565 if (GET_CODE (addr) != PLUS)
7568 op0 = XEXP (addr, 0);
7569 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7572 op1 = XEXP (addr, 1);
7573 if (!CONST_INT_P (op1))
7576 return quad_address_offset_p (INTVAL (op1));
7579 /* Return true if this is a load or store quad operation. This function does
7580 not handle the atomic quad memory instructions. */
7583 quad_load_store_p (rtx op0, rtx op1)
7587 if (!TARGET_QUAD_MEMORY)
7590 else if (REG_P (op0) && MEM_P (op1))
7591 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7592 && quad_memory_operand (op1, GET_MODE (op1))
7593 && !reg_overlap_mentioned_p (op0, op1));
7595 else if (MEM_P (op0) && REG_P (op1))
7596 ret = (quad_memory_operand (op0, GET_MODE (op0))
7597 && quad_int_reg_operand (op1, GET_MODE (op1)));
7602 if (TARGET_DEBUG_ADDR)
7604 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7605 ret ? "true" : "false");
7606 debug_rtx (gen_rtx_SET (op0, op1));
7612 /* Given an address, return a constant offset term if one exists. */
7615 address_offset (rtx op)
7617 if (GET_CODE (op) == PRE_INC
7618 || GET_CODE (op) == PRE_DEC)
7620 else if (GET_CODE (op) == PRE_MODIFY
7621 || GET_CODE (op) == LO_SUM)
7624 if (GET_CODE (op) == CONST)
7627 if (GET_CODE (op) == PLUS)
7630 if (CONST_INT_P (op))
7636 /* Return true if the MEM operand is a memory operand suitable for use
7637 with a (full width, possibly multiple) gpr load/store. On
7638 powerpc64 this means the offset must be divisible by 4.
7639 Implements 'Y' constraint.
7641 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7642 a constraint function we know the operand has satisfied a suitable
7643 memory predicate. Also accept some odd rtl generated by reload
7644 (see rs6000_legitimize_reload_address for various forms). It is
7645 important that reload rtl be accepted by appropriate constraints
7646 but not by the operand predicate.
7648 Offsetting a lo_sum should not be allowed, except where we know by
7649 alignment that a 32k boundary is not crossed, but see the ???
7650 comment in rs6000_legitimize_reload_address. Note that by
7651 "offsetting" here we mean a further offset to access parts of the
7652 MEM. It's fine to have a lo_sum where the inner address is offset
7653 from a sym, since the same sym+offset will appear in the high part
7654 of the address calculation. */
7657 mem_operand_gpr (rtx op, machine_mode mode)
7659 unsigned HOST_WIDE_INT offset;
7661 rtx addr = XEXP (op, 0);
7663 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7665 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7666 && mode_supports_pre_incdec_p (mode)
7667 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7670 /* Don't allow non-offsettable addresses. See PRs 83969 and 84279. */
7671 if (!rs6000_offsettable_memref_p (op, mode, false))
7674 op = address_offset (addr);
7678 offset = INTVAL (op);
7679 if (TARGET_POWERPC64 && (offset & 3) != 0)
7682 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7686 if (GET_CODE (addr) == LO_SUM)
7687 /* For lo_sum addresses, we must allow any offset except one that
7688 causes a wrap, so test only the low 16 bits. */
7689 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7691 return offset + 0x8000 < 0x10000u - extra;
7694 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7695 enforce an offset divisible by 4 even for 32-bit. */
7698 mem_operand_ds_form (rtx op, machine_mode mode)
7700 unsigned HOST_WIDE_INT offset;
7702 rtx addr = XEXP (op, 0);
7704 if (!offsettable_address_p (false, mode, addr))
7707 op = address_offset (addr);
7711 offset = INTVAL (op);
7712 if ((offset & 3) != 0)
7715 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7719 if (GET_CODE (addr) == LO_SUM)
7720 /* For lo_sum addresses, we must allow any offset except one that
7721 causes a wrap, so test only the low 16 bits. */
7722 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7724 return offset + 0x8000 < 0x10000u - extra;
7727 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7730 reg_offset_addressing_ok_p (machine_mode mode)
7744 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7745 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7746 a vector mode, if we want to use the VSX registers to move it around,
7747 we need to restrict ourselves to reg+reg addressing. Similarly for
7748 IEEE 128-bit floating point that is passed in a single vector
7750 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7751 return mode_supports_dq_form (mode);
7755 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7756 addressing for the LFIWZX and STFIWX instructions. */
7757 if (TARGET_NO_SDMODE_STACK)
7769 virtual_stack_registers_memory_p (rtx op)
7774 regnum = REGNO (op);
7776 else if (GET_CODE (op) == PLUS
7777 && REG_P (XEXP (op, 0))
7778 && CONST_INT_P (XEXP (op, 1)))
7779 regnum = REGNO (XEXP (op, 0));
7784 return (regnum >= FIRST_VIRTUAL_REGISTER
7785 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7788 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7789 is known to not straddle a 32k boundary. This function is used
7790 to determine whether -mcmodel=medium code can use TOC pointer
7791 relative addressing for OP. This means the alignment of the TOC
7792 pointer must also be taken into account, and unfortunately that is
7795 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7796 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7800 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7804 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7806 if (!SYMBOL_REF_P (op))
7809 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7811 if (mode_supports_dq_form (mode))
7814 dsize = GET_MODE_SIZE (mode);
7815 decl = SYMBOL_REF_DECL (op);
7821 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7822 replacing memory addresses with an anchor plus offset. We
7823 could find the decl by rummaging around in the block->objects
7824 VEC for the given offset but that seems like too much work. */
7825 dalign = BITS_PER_UNIT;
7826 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7827 && SYMBOL_REF_ANCHOR_P (op)
7828 && SYMBOL_REF_BLOCK (op) != NULL)
7830 struct object_block *block = SYMBOL_REF_BLOCK (op);
7832 dalign = block->alignment;
7833 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7835 else if (CONSTANT_POOL_ADDRESS_P (op))
7837 /* It would be nice to have get_pool_align().. */
7838 machine_mode cmode = get_pool_mode (op);
7840 dalign = GET_MODE_ALIGNMENT (cmode);
7843 else if (DECL_P (decl))
7845 dalign = DECL_ALIGN (decl);
7849 /* Allow BLKmode when the entire object is known to not
7850 cross a 32k boundary. */
7851 if (!DECL_SIZE_UNIT (decl))
7854 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7857 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7861 dalign /= BITS_PER_UNIT;
7862 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7863 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7864 return dalign >= dsize;
7870 /* Find how many bits of the alignment we know for this access. */
7871 dalign /= BITS_PER_UNIT;
7872 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7873 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7875 lsb = offset & -offset;
7879 return dalign >= dsize;
7883 constant_pool_expr_p (rtx op)
7887 split_const (op, &base, &offset);
7888 return (SYMBOL_REF_P (base)
7889 && CONSTANT_POOL_ADDRESS_P (base)
7890 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7893 /* These are only used to pass through from print_operand/print_operand_address
7894 to rs6000_output_addr_const_extra over the intervening function
7895 output_addr_const which is not target code. */
7896 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7898 /* Return true if OP is a toc pointer relative address (the output
7899 of create_TOC_reference). If STRICT, do not match non-split
7900 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7901 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7902 TOCREL_OFFSET_RET respectively. */
7905 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7906 const_rtx *tocrel_offset_ret)
7911 if (TARGET_CMODEL != CMODEL_SMALL)
7913 /* When strict ensure we have everything tidy. */
7915 && !(GET_CODE (op) == LO_SUM
7916 && REG_P (XEXP (op, 0))
7917 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7920 /* When not strict, allow non-split TOC addresses and also allow
7921 (lo_sum (high ..)) TOC addresses created during reload. */
7922 if (GET_CODE (op) == LO_SUM)
7926 const_rtx tocrel_base = op;
7927 const_rtx tocrel_offset = const0_rtx;
7929 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7931 tocrel_base = XEXP (op, 0);
7932 tocrel_offset = XEXP (op, 1);
7935 if (tocrel_base_ret)
7936 *tocrel_base_ret = tocrel_base;
7937 if (tocrel_offset_ret)
7938 *tocrel_offset_ret = tocrel_offset;
7940 return (GET_CODE (tocrel_base) == UNSPEC
7941 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7942 && REG_P (XVECEXP (tocrel_base, 0, 1))
7943 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7946 /* Return true if X is a constant pool address, and also for cmodel=medium
7947 if X is a toc-relative address known to be offsettable within MODE. */
7950 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7953 const_rtx tocrel_base, tocrel_offset;
7954 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7955 && (TARGET_CMODEL != CMODEL_MEDIUM
7956 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7958 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7959 INTVAL (tocrel_offset), mode)));
7963 legitimate_small_data_p (machine_mode mode, rtx x)
7965 return (DEFAULT_ABI == ABI_V4
7966 && !flag_pic && !TARGET_TOC
7967 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7968 && small_data_operand (x, mode));
7972 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7973 bool strict, bool worst_case)
7975 unsigned HOST_WIDE_INT offset;
7978 if (GET_CODE (x) != PLUS)
7980 if (!REG_P (XEXP (x, 0)))
7982 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7984 if (mode_supports_dq_form (mode))
7985 return quad_address_p (x, mode, strict);
7986 if (!reg_offset_addressing_ok_p (mode))
7987 return virtual_stack_registers_memory_p (x);
7988 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7990 if (!CONST_INT_P (XEXP (x, 1)))
7993 offset = INTVAL (XEXP (x, 1));
8000 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8002 if (VECTOR_MEM_VSX_P (mode))
8007 if (!TARGET_POWERPC64)
8009 else if (offset & 3)
8022 if (!TARGET_POWERPC64)
8024 else if (offset & 3)
8033 return offset < 0x10000 - extra;
8037 legitimate_indexed_address_p (rtx x, int strict)
8041 if (GET_CODE (x) != PLUS)
8047 return (REG_P (op0) && REG_P (op1)
8048 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8049 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8050 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8051 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8055 avoiding_indexed_address_p (machine_mode mode)
8057 /* Avoid indexed addressing for modes that have non-indexed
8058 load/store instruction forms. */
8059 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8063 legitimate_indirect_address_p (rtx x, int strict)
8065 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8069 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8071 if (!TARGET_MACHO || !flag_pic
8072 || mode != SImode || !MEM_P (x))
8076 if (GET_CODE (x) != LO_SUM)
8078 if (!REG_P (XEXP (x, 0)))
8080 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8084 return CONSTANT_P (x);
8088 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8090 if (GET_CODE (x) != LO_SUM)
8092 if (!REG_P (XEXP (x, 0)))
8094 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8096 /* quad word addresses are restricted, and we can't use LO_SUM. */
8097 if (mode_supports_dq_form (mode))
8101 if (TARGET_ELF || TARGET_MACHO)
8105 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8107 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8108 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8109 recognizes some LO_SUM addresses as valid although this
8110 function says opposite. In most cases, LRA through different
8111 transformations can generate correct code for address reloads.
8112 It cannot manage only some LO_SUM cases. So we need to add
8113 code analogous to one in rs6000_legitimize_reload_address for
8114 LOW_SUM here saying that some addresses are still valid. */
8115 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8116 && small_toc_ref (x, VOIDmode));
8117 if (TARGET_TOC && ! large_toc_ok)
8119 if (GET_MODE_NUNITS (mode) != 1)
8121 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8122 && !(/* ??? Assume floating point reg based on mode? */
8123 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8126 return CONSTANT_P (x) || large_toc_ok;
8133 /* Try machine-dependent ways of modifying an illegitimate address
8134 to be legitimate. If we find one, return the new, valid address.
8135 This is used from only one place: `memory_address' in explow.c.
8137 OLDX is the address as it was before break_out_memory_refs was
8138 called. In some cases it is useful to look at this to decide what
8141 It is always safe for this function to do nothing. It exists to
8142 recognize opportunities to optimize the output.
8144 On RS/6000, first check for the sum of a register with a constant
8145 integer that is out of range. If so, generate code to add the
8146 constant with the low-order 16 bits masked to the register and force
8147 this result into another register (this can be done with `cau').
8148 Then generate an address of REG+(CONST&0xffff), allowing for the
8149 possibility of bit 16 being a one.
8151 Then check for the sum of a register and something not constant, try to
8152 load the other things into a register and return the sum. */
8155 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8160 if (!reg_offset_addressing_ok_p (mode)
8161 || mode_supports_dq_form (mode))
8163 if (virtual_stack_registers_memory_p (x))
8166 /* In theory we should not be seeing addresses of the form reg+0,
8167 but just in case it is generated, optimize it away. */
8168 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8169 return force_reg (Pmode, XEXP (x, 0));
8171 /* For TImode with load/store quad, restrict addresses to just a single
8172 pointer, so it works with both GPRs and VSX registers. */
8173 /* Make sure both operands are registers. */
8174 else if (GET_CODE (x) == PLUS
8175 && (mode != TImode || !TARGET_VSX))
8176 return gen_rtx_PLUS (Pmode,
8177 force_reg (Pmode, XEXP (x, 0)),
8178 force_reg (Pmode, XEXP (x, 1)));
8180 return force_reg (Pmode, x);
8182 if (SYMBOL_REF_P (x))
8184 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8186 return rs6000_legitimize_tls_address (x, model);
8198 /* As in legitimate_offset_address_p we do not assume
8199 worst-case. The mode here is just a hint as to the registers
8200 used. A TImode is usually in gprs, but may actually be in
8201 fprs. Leave worst-case scenario for reload to handle via
8202 insn constraints. PTImode is only GPRs. */
8209 if (GET_CODE (x) == PLUS
8210 && REG_P (XEXP (x, 0))
8211 && CONST_INT_P (XEXP (x, 1))
8212 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8213 >= 0x10000 - extra))
8215 HOST_WIDE_INT high_int, low_int;
8217 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8218 if (low_int >= 0x8000 - extra)
8220 high_int = INTVAL (XEXP (x, 1)) - low_int;
8221 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8222 GEN_INT (high_int)), 0);
8223 return plus_constant (Pmode, sum, low_int);
8225 else if (GET_CODE (x) == PLUS
8226 && REG_P (XEXP (x, 0))
8227 && !CONST_INT_P (XEXP (x, 1))
8228 && GET_MODE_NUNITS (mode) == 1
8229 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8230 || (/* ??? Assume floating point reg based on mode? */
8231 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8232 && !avoiding_indexed_address_p (mode))
8234 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8235 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8237 else if ((TARGET_ELF
8239 || !MACHO_DYNAMIC_NO_PIC_P
8246 && !CONST_WIDE_INT_P (x)
8247 && !CONST_DOUBLE_P (x)
8249 && GET_MODE_NUNITS (mode) == 1
8250 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8251 || (/* ??? Assume floating point reg based on mode? */
8252 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8254 rtx reg = gen_reg_rtx (Pmode);
8256 emit_insn (gen_elf_high (reg, x));
8258 emit_insn (gen_macho_high (reg, x));
8259 return gen_rtx_LO_SUM (Pmode, reg, x);
8263 && constant_pool_expr_p (x)
8264 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8265 return create_TOC_reference (x, NULL_RTX);
8270 /* Debug version of rs6000_legitimize_address. */
8272 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8278 ret = rs6000_legitimize_address (x, oldx, mode);
8279 insns = get_insns ();
8285 "\nrs6000_legitimize_address: mode %s, old code %s, "
8286 "new code %s, modified\n",
8287 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8288 GET_RTX_NAME (GET_CODE (ret)));
8290 fprintf (stderr, "Original address:\n");
8293 fprintf (stderr, "oldx:\n");
8296 fprintf (stderr, "New address:\n");
8301 fprintf (stderr, "Insns added:\n");
8302 debug_rtx_list (insns, 20);
8308 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8309 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8320 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8321 We need to emit DTP-relative relocations. */
8323 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8325 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8330 fputs ("\t.long\t", file);
8333 fputs (DOUBLE_INT_ASM_OP, file);
8338 output_addr_const (file, x);
8340 fputs ("@dtprel+0x8000", file);
8341 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8343 switch (SYMBOL_REF_TLS_MODEL (x))
8347 case TLS_MODEL_LOCAL_EXEC:
8348 fputs ("@le", file);
8350 case TLS_MODEL_INITIAL_EXEC:
8351 fputs ("@ie", file);
8353 case TLS_MODEL_GLOBAL_DYNAMIC:
8354 case TLS_MODEL_LOCAL_DYNAMIC:
8363 /* Return true if X is a symbol that refers to real (rather than emulated)
8367 rs6000_real_tls_symbol_ref_p (rtx x)
8369 return (SYMBOL_REF_P (x)
8370 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8373 /* In the name of slightly smaller debug output, and to cater to
8374 general assembler lossage, recognize various UNSPEC sequences
8375 and turn them back into a direct symbol reference. */
8378 rs6000_delegitimize_address (rtx orig_x)
8382 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8383 orig_x = XVECEXP (orig_x, 0, 0);
8385 orig_x = delegitimize_mem_from_attrs (orig_x);
8392 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8396 if (GET_CODE (y) == PLUS
8397 && GET_MODE (y) == Pmode
8398 && CONST_INT_P (XEXP (y, 1)))
8400 offset = XEXP (y, 1);
8404 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8406 y = XVECEXP (y, 0, 0);
8409 /* Do not associate thread-local symbols with the original
8410 constant pool symbol. */
8413 && CONSTANT_POOL_ADDRESS_P (y)
8414 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8418 if (offset != NULL_RTX)
8419 y = gen_rtx_PLUS (Pmode, y, offset);
8420 if (!MEM_P (orig_x))
8423 return replace_equiv_address_nv (orig_x, y);
8427 && GET_CODE (orig_x) == LO_SUM
8428 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8430 y = XEXP (XEXP (orig_x, 1), 0);
8431 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8432 return XVECEXP (y, 0, 0);
8438 /* Return true if X shouldn't be emitted into the debug info.
8439 The linker doesn't like .toc section references from
8440 .debug_* sections, so reject .toc section symbols. */
8443 rs6000_const_not_ok_for_debug_p (rtx x)
8445 if (GET_CODE (x) == UNSPEC)
8447 if (SYMBOL_REF_P (x)
8448 && CONSTANT_POOL_ADDRESS_P (x))
8450 rtx c = get_pool_constant (x);
8451 machine_mode cmode = get_pool_mode (x);
8452 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8459 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8462 rs6000_legitimate_combined_insn (rtx_insn *insn)
8464 int icode = INSN_CODE (insn);
8466 /* Reject creating doloop insns. Combine should not be allowed
8467 to create these for a number of reasons:
8468 1) In a nested loop, if combine creates one of these in an
8469 outer loop and the register allocator happens to allocate ctr
8470 to the outer loop insn, then the inner loop can't use ctr.
8471 Inner loops ought to be more highly optimized.
8472 2) Combine often wants to create one of these from what was
8473 originally a three insn sequence, first combining the three
8474 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8475 allocated ctr, the splitter takes use back to the three insn
8476 sequence. It's better to stop combine at the two insn
8478 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8479 insns, the register allocator sometimes uses floating point
8480 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8481 jump insn and output reloads are not implemented for jumps,
8482 the ctrsi/ctrdi splitters need to handle all possible cases.
8483 That's a pain, and it gets to be seriously difficult when a
8484 splitter that runs after reload needs memory to transfer from
8485 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8486 for the difficult case. It's better to not create problems
8487 in the first place. */
8488 if (icode != CODE_FOR_nothing
8489 && (icode == CODE_FOR_bdz_si
8490 || icode == CODE_FOR_bdz_di
8491 || icode == CODE_FOR_bdnz_si
8492 || icode == CODE_FOR_bdnz_di
8493 || icode == CODE_FOR_bdztf_si
8494 || icode == CODE_FOR_bdztf_di
8495 || icode == CODE_FOR_bdnztf_si
8496 || icode == CODE_FOR_bdnztf_di))
8502 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8504 static GTY(()) rtx rs6000_tls_symbol;
8506 rs6000_tls_get_addr (void)
8508 if (!rs6000_tls_symbol)
8509 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8511 return rs6000_tls_symbol;
8514 /* Construct the SYMBOL_REF for TLS GOT references. */
8516 static GTY(()) rtx rs6000_got_symbol;
8518 rs6000_got_sym (void)
8520 if (!rs6000_got_symbol)
8522 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8523 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8524 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8527 return rs6000_got_symbol;
8530 /* AIX Thread-Local Address support. */
8533 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8535 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8539 name = XSTR (addr, 0);
8540 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8541 or the symbol will be in TLS private data section. */
8542 if (name[strlen (name) - 1] != ']'
8543 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8544 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8546 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8547 strcpy (tlsname, name);
8549 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8550 tlsaddr = copy_rtx (addr);
8551 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8556 /* Place addr into TOC constant pool. */
8557 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8559 /* Output the TOC entry and create the MEM referencing the value. */
8560 if (constant_pool_expr_p (XEXP (sym, 0))
8561 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8563 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8564 mem = gen_const_mem (Pmode, tocref);
8565 set_mem_alias_set (mem, get_TOC_alias_set ());
8570 /* Use global-dynamic for local-dynamic. */
8571 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8572 || model == TLS_MODEL_LOCAL_DYNAMIC)
8574 /* Create new TOC reference for @m symbol. */
8575 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8576 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8577 strcpy (tlsname, "*LCM");
8578 strcat (tlsname, name + 3);
8579 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8580 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8581 tocref = create_TOC_reference (modaddr, NULL_RTX);
8582 rtx modmem = gen_const_mem (Pmode, tocref);
8583 set_mem_alias_set (modmem, get_TOC_alias_set ());
8585 rtx modreg = gen_reg_rtx (Pmode);
8586 emit_insn (gen_rtx_SET (modreg, modmem));
8588 tmpreg = gen_reg_rtx (Pmode);
8589 emit_insn (gen_rtx_SET (tmpreg, mem));
8591 dest = gen_reg_rtx (Pmode);
8593 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8595 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8598 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8599 else if (TARGET_32BIT)
8601 tlsreg = gen_reg_rtx (SImode);
8602 emit_insn (gen_tls_get_tpointer (tlsreg));
8605 tlsreg = gen_rtx_REG (DImode, 13);
8607 /* Load the TOC value into temporary register. */
8608 tmpreg = gen_reg_rtx (Pmode);
8609 emit_insn (gen_rtx_SET (tmpreg, mem));
8610 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8611 gen_rtx_MINUS (Pmode, addr, tlsreg));
8613 /* Add TOC symbol value to TLS pointer. */
8614 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8619 /* Output arg setup instructions for a !TARGET_TLS_MARKERS
8620 __tls_get_addr call. */
8623 rs6000_output_tlsargs (rtx *operands)
8625 /* Set up operands for output_asm_insn, without modifying OPERANDS. */
8628 /* The set dest of the call, ie. r3, which is also the first arg reg. */
8629 op[0] = operands[0];
8630 /* The TLS symbol from global_tlsarg stashed as CALL operand 2. */
8631 op[1] = XVECEXP (operands[2], 0, 0);
8632 if (XINT (operands[2], 1) == UNSPEC_TLSGD)
8634 /* The GOT register. */
8635 op[2] = XVECEXP (operands[2], 0, 1);
8636 if (TARGET_CMODEL != CMODEL_SMALL)
8637 output_asm_insn ("addis %0,%2,%1@got@tlsgd@ha\n\t"
8638 "addi %0,%0,%1@got@tlsgd@l", op);
8640 output_asm_insn ("addi %0,%2,%1@got@tlsgd", op);
8642 else if (XINT (operands[2], 1) == UNSPEC_TLSLD)
8644 if (TARGET_CMODEL != CMODEL_SMALL)
8645 output_asm_insn ("addis %0,%1,%&@got@tlsld@ha\n\t"
8646 "addi %0,%0,%&@got@tlsld@l", op);
8648 output_asm_insn ("addi %0,%1,%&@got@tlsld", op);
8654 /* Passes the tls arg value for global dynamic and local dynamic
8655 emit_library_call_value in rs6000_legitimize_tls_address to
8656 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8657 marker relocs put on __tls_get_addr calls. */
8658 static rtx global_tlsarg;
8660 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8661 this (thread-local) address. */
8664 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8669 return rs6000_legitimize_tls_address_aix (addr, model);
8671 dest = gen_reg_rtx (Pmode);
8672 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
8678 tlsreg = gen_rtx_REG (Pmode, 13);
8679 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8683 tlsreg = gen_rtx_REG (Pmode, 2);
8684 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8688 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8692 tmp = gen_reg_rtx (Pmode);
8695 tlsreg = gen_rtx_REG (Pmode, 13);
8696 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8700 tlsreg = gen_rtx_REG (Pmode, 2);
8701 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8705 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8707 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8712 rtx got, tga, tmp1, tmp2;
8714 /* We currently use relocations like @got@tlsgd for tls, which
8715 means the linker will handle allocation of tls entries, placing
8716 them in the .got section. So use a pointer to the .got section,
8717 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8718 or to secondary GOT sections used by 32-bit -fPIC. */
8720 got = gen_rtx_REG (Pmode, 2);
8724 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8727 rtx gsym = rs6000_got_sym ();
8728 got = gen_reg_rtx (Pmode);
8730 rs6000_emit_move (got, gsym, Pmode);
8735 tmp1 = gen_reg_rtx (Pmode);
8736 tmp2 = gen_reg_rtx (Pmode);
8737 mem = gen_const_mem (Pmode, tmp1);
8738 lab = gen_label_rtx ();
8739 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8740 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8741 if (TARGET_LINK_STACK)
8742 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8743 emit_move_insn (tmp2, mem);
8744 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8745 set_unique_reg_note (last, REG_EQUAL, gsym);
8750 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8752 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8754 tga = rs6000_tls_get_addr ();
8755 global_tlsarg = arg;
8756 if (TARGET_TLS_MARKERS)
8758 rtx argreg = gen_rtx_REG (Pmode, 3);
8759 emit_insn (gen_rtx_SET (argreg, arg));
8760 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
8764 emit_library_call_value (tga, dest, LCT_CONST, Pmode);
8765 global_tlsarg = NULL_RTX;
8767 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8769 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8770 tga = rs6000_tls_get_addr ();
8771 tmp1 = gen_reg_rtx (Pmode);
8772 global_tlsarg = arg;
8773 if (TARGET_TLS_MARKERS)
8775 rtx argreg = gen_rtx_REG (Pmode, 3);
8776 emit_insn (gen_rtx_SET (argreg, arg));
8777 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
8781 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode);
8782 global_tlsarg = NULL_RTX;
8784 if (rs6000_tls_size == 16)
8787 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8789 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8791 else if (rs6000_tls_size == 32)
8793 tmp2 = gen_reg_rtx (Pmode);
8795 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8797 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8800 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8802 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8806 tmp2 = gen_reg_rtx (Pmode);
8808 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8810 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8812 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8818 /* IE, or 64-bit offset LE. */
8819 tmp2 = gen_reg_rtx (Pmode);
8821 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8823 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8826 insn = gen_tls_tls_64 (dest, tmp2, addr);
8828 insn = gen_tls_tls_32 (dest, tmp2, addr);
8836 /* Only create the global variable for the stack protect guard if we are using
8837 the global flavor of that guard. */
8839 rs6000_init_stack_protect_guard (void)
8841 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8842 return default_stack_protect_guard ();
8847 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8850 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8852 if (GET_CODE (x) == HIGH
8853 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8856 /* A TLS symbol in the TOC cannot contain a sum. */
8857 if (GET_CODE (x) == CONST
8858 && GET_CODE (XEXP (x, 0)) == PLUS
8859 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8860 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8863 /* Do not place an ELF TLS symbol in the constant pool. */
8864 return TARGET_ELF && tls_referenced_p (x);
8867 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8868 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8869 can be addressed relative to the toc pointer. */
8872 use_toc_relative_ref (rtx sym, machine_mode mode)
8874 return ((constant_pool_expr_p (sym)
8875 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8876 get_pool_mode (sym)))
8877 || (TARGET_CMODEL == CMODEL_MEDIUM
8878 && SYMBOL_REF_LOCAL_P (sym)
8879 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8882 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
8883 replace the input X, or the original X if no replacement is called for.
8884 The output parameter *WIN is 1 if the calling macro should goto WIN,
8887 For RS/6000, we wish to handle large displacements off a base
8888 register by splitting the addend across an addiu/addis and the mem insn.
8889 This cuts number of extra insns needed from 3 to 1.
8891 On Darwin, we use this to generate code for floating point constants.
8892 A movsf_low is generated so we wind up with 2 instructions rather than 3.
8893 The Darwin code is inside #if TARGET_MACHO because only then are the
8894 machopic_* functions defined. */
8896 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
8897 int opnum, int type,
8898 int ind_levels ATTRIBUTE_UNUSED, int *win)
8900 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8901 bool quad_offset_p = mode_supports_dq_form (mode);
8903 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
8904 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
8907 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
8908 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
8909 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
8910 && TARGET_P9_VECTOR)
8911 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
8912 && TARGET_P9_VECTOR)))
8913 reg_offset_p = false;
8915 /* We must recognize output that we have already generated ourselves. */
8916 if (GET_CODE (x) == PLUS
8917 && GET_CODE (XEXP (x, 0)) == PLUS
8918 && REG_P (XEXP (XEXP (x, 0), 0))
8919 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8920 && CONST_INT_P (XEXP (x, 1)))
8922 if (TARGET_DEBUG_ADDR)
8924 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
8927 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8928 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
8929 opnum, (enum reload_type) type);
8934 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
8935 if (GET_CODE (x) == LO_SUM
8936 && GET_CODE (XEXP (x, 0)) == HIGH)
8938 if (TARGET_DEBUG_ADDR)
8940 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
8943 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8944 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8945 opnum, (enum reload_type) type);
8951 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
8952 && GET_CODE (x) == LO_SUM
8953 && GET_CODE (XEXP (x, 0)) == PLUS
8954 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
8955 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
8956 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
8957 && machopic_operand_p (XEXP (x, 1)))
8959 /* Result of previous invocation of this function on Darwin
8960 floating point constant. */
8961 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8962 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8963 opnum, (enum reload_type) type);
8969 if (TARGET_CMODEL != CMODEL_SMALL
8972 && small_toc_ref (x, VOIDmode))
8974 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
8975 x = gen_rtx_LO_SUM (Pmode, hi, x);
8976 if (TARGET_DEBUG_ADDR)
8978 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
8981 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8982 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8983 opnum, (enum reload_type) type);
8988 if (GET_CODE (x) == PLUS
8989 && REG_P (XEXP (x, 0))
8990 && HARD_REGISTER_P (XEXP (x, 0))
8991 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
8992 && CONST_INT_P (XEXP (x, 1))
8994 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
8996 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
8997 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
8999 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9001 /* Check for 32-bit overflow or quad addresses with one of the
9002 four least significant bits set. */
9003 if (high + low != val
9004 || (quad_offset_p && (low & 0xf)))
9010 /* Reload the high part into a base reg; leave the low part
9011 in the mem directly. */
9013 x = gen_rtx_PLUS (GET_MODE (x),
9014 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9018 if (TARGET_DEBUG_ADDR)
9020 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9023 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9024 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9025 opnum, (enum reload_type) type);
9030 if (SYMBOL_REF_P (x)
9033 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9035 && DEFAULT_ABI == ABI_DARWIN
9036 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9037 && machopic_symbol_defined_p (x)
9039 && DEFAULT_ABI == ABI_V4
9042 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9043 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9045 ??? Assume floating point reg based on mode? This assumption is
9046 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9047 where reload ends up doing a DFmode load of a constant from
9048 mem using two gprs. Unfortunately, at this point reload
9049 hasn't yet selected regs so poking around in reload data
9050 won't help and even if we could figure out the regs reliably,
9051 we'd still want to allow this transformation when the mem is
9052 naturally aligned. Since we say the address is good here, we
9053 can't disable offsets from LO_SUMs in mem_operand_gpr.
9054 FIXME: Allow offset from lo_sum for other modes too, when
9055 mem is sufficiently aligned.
9057 Also disallow this if the type can go in VMX/Altivec registers, since
9058 those registers do not have d-form (reg+offset) address modes. */
9059 && !reg_addr[mode].scalar_in_vmx_p
9064 && (mode != TImode || !TARGET_VSX)
9066 && (mode != DImode || TARGET_POWERPC64)
9067 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9068 || TARGET_HARD_FLOAT))
9073 rtx offset = machopic_gen_offset (x);
9074 x = gen_rtx_LO_SUM (GET_MODE (x),
9075 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9076 gen_rtx_HIGH (Pmode, offset)), offset);
9080 x = gen_rtx_LO_SUM (GET_MODE (x),
9081 gen_rtx_HIGH (Pmode, x), x);
9083 if (TARGET_DEBUG_ADDR)
9085 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9088 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9089 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9090 opnum, (enum reload_type) type);
9095 /* Reload an offset address wrapped by an AND that represents the
9096 masking of the lower bits. Strip the outer AND and let reload
9097 convert the offset address into an indirect address. For VSX,
9098 force reload to create the address with an AND in a separate
9099 register, because we can't guarantee an altivec register will
9101 if (VECTOR_MEM_ALTIVEC_P (mode)
9102 && GET_CODE (x) == AND
9103 && GET_CODE (XEXP (x, 0)) == PLUS
9104 && REG_P (XEXP (XEXP (x, 0), 0))
9105 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9106 && CONST_INT_P (XEXP (x, 1))
9107 && INTVAL (XEXP (x, 1)) == -16)
9118 && use_toc_relative_ref (x, mode))
9120 x = create_TOC_reference (x, NULL_RTX);
9121 if (TARGET_CMODEL != CMODEL_SMALL)
9123 if (TARGET_DEBUG_ADDR)
9125 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9128 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9129 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9130 opnum, (enum reload_type) type);
9139 /* Debug version of rs6000_legitimize_reload_address. */
9141 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9142 int opnum, int type,
9143 int ind_levels, int *win)
9145 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9148 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9149 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9150 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9154 fprintf (stderr, "Same address returned\n");
9156 fprintf (stderr, "NULL returned\n");
9159 fprintf (stderr, "New address:\n");
9166 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9167 that is a valid memory address for an instruction.
9168 The MODE argument is the machine mode for the MEM expression
9169 that wants to use this address.
9171 On the RS/6000, there are four valid address: a SYMBOL_REF that
9172 refers to a constant pool entry of an address (or the sum of it
9173 plus a constant), a short (16-bit signed) constant plus a register,
9174 the sum of two registers, or a register indirect, possibly with an
9175 auto-increment. For DFmode, DDmode and DImode with a constant plus
9176 register, we must ensure that both words are addressable or PowerPC64
9177 with offset word aligned.
9179 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9180 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9181 because adjacent memory cells are accessed by adding word-sized offsets
9182 during assembly output. */
9184 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9186 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9187 bool quad_offset_p = mode_supports_dq_form (mode);
9189 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9190 if (VECTOR_MEM_ALTIVEC_P (mode)
9191 && GET_CODE (x) == AND
9192 && CONST_INT_P (XEXP (x, 1))
9193 && INTVAL (XEXP (x, 1)) == -16)
9196 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9198 if (legitimate_indirect_address_p (x, reg_ok_strict))
9201 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9202 && mode_supports_pre_incdec_p (mode)
9203 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9205 /* Handle restricted vector d-form offsets in ISA 3.0. */
9208 if (quad_address_p (x, mode, reg_ok_strict))
9211 else if (virtual_stack_registers_memory_p (x))
9214 else if (reg_offset_p)
9216 if (legitimate_small_data_p (mode, x))
9218 if (legitimate_constant_pool_address_p (x, mode,
9219 reg_ok_strict || lra_in_progress))
9223 /* For TImode, if we have TImode in VSX registers, only allow register
9224 indirect addresses. This will allow the values to go in either GPRs
9225 or VSX registers without reloading. The vector types would tend to
9226 go into VSX registers, so we allow REG+REG, while TImode seems
9227 somewhat split, in that some uses are GPR based, and some VSX based. */
9228 /* FIXME: We could loosen this by changing the following to
9229 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9230 but currently we cannot allow REG+REG addressing for TImode. See
9231 PR72827 for complete details on how this ends up hoodwinking DSE. */
9232 if (mode == TImode && TARGET_VSX)
9234 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9237 && GET_CODE (x) == PLUS
9238 && REG_P (XEXP (x, 0))
9239 && (XEXP (x, 0) == virtual_stack_vars_rtx
9240 || XEXP (x, 0) == arg_pointer_rtx)
9241 && CONST_INT_P (XEXP (x, 1)))
9243 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9245 if (!FLOAT128_2REG_P (mode)
9246 && (TARGET_HARD_FLOAT
9248 || (mode != DFmode && mode != DDmode))
9249 && (TARGET_POWERPC64 || mode != DImode)
9250 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9252 && !avoiding_indexed_address_p (mode)
9253 && legitimate_indexed_address_p (x, reg_ok_strict))
9255 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9256 && mode_supports_pre_modify_p (mode)
9257 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9258 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9259 reg_ok_strict, false)
9260 || (!avoiding_indexed_address_p (mode)
9261 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9262 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9264 if (reg_offset_p && !quad_offset_p
9265 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9270 /* Debug version of rs6000_legitimate_address_p. */
9272 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9275 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9277 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9278 "strict = %d, reload = %s, code = %s\n",
9279 ret ? "true" : "false",
9280 GET_MODE_NAME (mode),
9282 (reload_completed ? "after" : "before"),
9283 GET_RTX_NAME (GET_CODE (x)));
9289 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9292 rs6000_mode_dependent_address_p (const_rtx addr,
9293 addr_space_t as ATTRIBUTE_UNUSED)
9295 return rs6000_mode_dependent_address_ptr (addr);
9298 /* Go to LABEL if ADDR (a legitimate address expression)
9299 has an effect that depends on the machine mode it is used for.
9301 On the RS/6000 this is true of all integral offsets (since AltiVec
9302 and VSX modes don't allow them) or is a pre-increment or decrement.
9304 ??? Except that due to conceptual problems in offsettable_address_p
9305 we can't really report the problems of integral offsets. So leave
9306 this assuming that the adjustable offset must be valid for the
9307 sub-words of a TFmode operand, which is what we had before. */
9310 rs6000_mode_dependent_address (const_rtx addr)
9312 switch (GET_CODE (addr))
9315 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9316 is considered a legitimate address before reload, so there
9317 are no offset restrictions in that case. Note that this
9318 condition is safe in strict mode because any address involving
9319 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9320 been rejected as illegitimate. */
9321 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9322 && XEXP (addr, 0) != arg_pointer_rtx
9323 && CONST_INT_P (XEXP (addr, 1)))
9325 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9326 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
9331 /* Anything in the constant pool is sufficiently aligned that
9332 all bytes have the same high part address. */
9333 return !legitimate_constant_pool_address_p (addr, QImode, false);
9335 /* Auto-increment cases are now treated generically in recog.c. */
9337 return TARGET_UPDATE;
9339 /* AND is only allowed in Altivec loads. */
9350 /* Debug version of rs6000_mode_dependent_address. */
9352 rs6000_debug_mode_dependent_address (const_rtx addr)
9354 bool ret = rs6000_mode_dependent_address (addr);
9356 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9357 ret ? "true" : "false");
9363 /* Implement FIND_BASE_TERM. */
9366 rs6000_find_base_term (rtx op)
9371 if (GET_CODE (base) == CONST)
9372 base = XEXP (base, 0);
9373 if (GET_CODE (base) == PLUS)
9374 base = XEXP (base, 0);
9375 if (GET_CODE (base) == UNSPEC)
9376 switch (XINT (base, 1))
9379 case UNSPEC_MACHOPIC_OFFSET:
9380 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9381 for aliasing purposes. */
9382 return XVECEXP (base, 0, 0);
9388 /* More elaborate version of recog's offsettable_memref_p predicate
9389 that works around the ??? note of rs6000_mode_dependent_address.
9390 In particular it accepts
9392 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9394 in 32-bit mode, that the recog predicate rejects. */
9397 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9404 /* First mimic offsettable_memref_p. */
9405 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9408 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9409 the latter predicate knows nothing about the mode of the memory
9410 reference and, therefore, assumes that it is the largest supported
9411 mode (TFmode). As a consequence, legitimate offsettable memory
9412 references are rejected. rs6000_legitimate_offset_address_p contains
9413 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9414 at least with a little bit of help here given that we know the
9415 actual registers used. */
9416 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9417 || GET_MODE_SIZE (reg_mode) == 4);
9418 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9419 strict, worst_case);
9422 /* Determine the reassociation width to be used in reassociate_bb.
9423 This takes into account how many parallel operations we
9424 can actually do of a given type, and also the latency.
9428 vect add/sub/mul 2/cycle
9429 fp add/sub/mul 2/cycle
9434 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9437 switch (rs6000_tune)
9439 case PROCESSOR_POWER8:
9440 case PROCESSOR_POWER9:
9441 if (DECIMAL_FLOAT_MODE_P (mode))
9443 if (VECTOR_MODE_P (mode))
9445 if (INTEGRAL_MODE_P (mode))
9447 if (FLOAT_MODE_P (mode))
9456 /* Change register usage conditional on target flags. */
9458 rs6000_conditional_register_usage (void)
9462 if (TARGET_DEBUG_TARGET)
9463 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9465 /* Set MQ register fixed (already call_used) so that it will not be
9469 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9471 fixed_regs[13] = call_used_regs[13]
9472 = call_really_used_regs[13] = 1;
9474 /* Conditionally disable FPRs. */
9475 if (TARGET_SOFT_FLOAT)
9476 for (i = 32; i < 64; i++)
9477 fixed_regs[i] = call_used_regs[i]
9478 = call_really_used_regs[i] = 1;
9480 /* The TOC register is not killed across calls in a way that is
9481 visible to the compiler. */
9482 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9483 call_really_used_regs[2] = 0;
9485 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9486 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9488 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9489 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9490 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9491 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9493 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9494 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9495 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9496 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9498 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9499 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9500 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9502 if (!TARGET_ALTIVEC && !TARGET_VSX)
9504 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9505 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9506 call_really_used_regs[VRSAVE_REGNO] = 1;
9509 if (TARGET_ALTIVEC || TARGET_VSX)
9510 global_regs[VSCR_REGNO] = 1;
9512 if (TARGET_ALTIVEC_ABI)
9514 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9515 call_used_regs[i] = call_really_used_regs[i] = 1;
9517 /* AIX reserves VR20:31 in non-extended ABI mode. */
9519 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9520 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9525 /* Output insns to set DEST equal to the constant SOURCE as a series of
9526 lis, ori and shl instructions and return TRUE. */
9529 rs6000_emit_set_const (rtx dest, rtx source)
9531 machine_mode mode = GET_MODE (dest);
9536 gcc_checking_assert (CONST_INT_P (source));
9537 c = INTVAL (source);
9542 emit_insn (gen_rtx_SET (dest, source));
9546 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9548 emit_insn (gen_rtx_SET (copy_rtx (temp),
9549 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9550 emit_insn (gen_rtx_SET (dest,
9551 gen_rtx_IOR (SImode, copy_rtx (temp),
9552 GEN_INT (c & 0xffff))));
9556 if (!TARGET_POWERPC64)
9560 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9562 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9564 emit_move_insn (hi, GEN_INT (c >> 32));
9565 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9566 emit_move_insn (lo, GEN_INT (c));
9569 rs6000_emit_set_long_const (dest, c);
9576 insn = get_last_insn ();
9577 set = single_set (insn);
9578 if (! CONSTANT_P (SET_SRC (set)))
9579 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9584 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9585 Output insns to set DEST equal to the constant C as a series of
9586 lis, ori and shl instructions. */
9589 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9592 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9602 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9603 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9604 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9606 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9607 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9609 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9611 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9612 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9614 emit_move_insn (dest,
9615 gen_rtx_IOR (DImode, copy_rtx (temp),
9618 else if (ud3 == 0 && ud4 == 0)
9620 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9622 gcc_assert (ud2 & 0x8000);
9623 emit_move_insn (copy_rtx (temp),
9624 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9626 emit_move_insn (copy_rtx (temp),
9627 gen_rtx_IOR (DImode, copy_rtx (temp),
9629 emit_move_insn (dest,
9630 gen_rtx_ZERO_EXTEND (DImode,
9631 gen_lowpart (SImode,
9634 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9635 || (ud4 == 0 && ! (ud3 & 0x8000)))
9637 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9639 emit_move_insn (copy_rtx (temp),
9640 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9642 emit_move_insn (copy_rtx (temp),
9643 gen_rtx_IOR (DImode, copy_rtx (temp),
9645 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9646 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9649 emit_move_insn (dest,
9650 gen_rtx_IOR (DImode, copy_rtx (temp),
9655 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9657 emit_move_insn (copy_rtx (temp),
9658 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9660 emit_move_insn (copy_rtx (temp),
9661 gen_rtx_IOR (DImode, copy_rtx (temp),
9664 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9665 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9668 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9669 gen_rtx_IOR (DImode, copy_rtx (temp),
9670 GEN_INT (ud2 << 16)));
9672 emit_move_insn (dest,
9673 gen_rtx_IOR (DImode, copy_rtx (temp),
9678 /* Helper for the following. Get rid of [r+r] memory refs
9679 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9682 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9684 if (MEM_P (operands[0])
9685 && !REG_P (XEXP (operands[0], 0))
9686 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9687 GET_MODE (operands[0]), false))
9689 = replace_equiv_address (operands[0],
9690 copy_addr_to_reg (XEXP (operands[0], 0)));
9692 if (MEM_P (operands[1])
9693 && !REG_P (XEXP (operands[1], 0))
9694 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9695 GET_MODE (operands[1]), false))
9697 = replace_equiv_address (operands[1],
9698 copy_addr_to_reg (XEXP (operands[1], 0)));
9701 /* Generate a vector of constants to permute MODE for a little-endian
9702 storage operation by swapping the two halves of a vector. */
9704 rs6000_const_vec (machine_mode mode)
9732 v = rtvec_alloc (subparts);
9734 for (i = 0; i < subparts / 2; ++i)
9735 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9736 for (i = subparts / 2; i < subparts; ++i)
9737 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9742 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9745 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9747 /* Scalar permutations are easier to express in integer modes rather than
9748 floating-point modes, so cast them here. We use V1TImode instead
9749 of TImode to ensure that the values don't go through GPRs. */
9750 if (FLOAT128_VECTOR_P (mode))
9752 dest = gen_lowpart (V1TImode, dest);
9753 source = gen_lowpart (V1TImode, source);
9757 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9759 if (mode == TImode || mode == V1TImode)
9760 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9764 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9765 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9769 /* Emit a little-endian load from vector memory location SOURCE to VSX
9770 register DEST in mode MODE. The load is done with two permuting
9771 insn's that represent an lxvd2x and xxpermdi. */
9773 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9775 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9777 if (mode == TImode || mode == V1TImode)
9780 dest = gen_lowpart (V2DImode, dest);
9781 source = adjust_address (source, V2DImode, 0);
9784 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9785 rs6000_emit_le_vsx_permute (tmp, source, mode);
9786 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9789 /* Emit a little-endian store to vector memory location DEST from VSX
9790 register SOURCE in mode MODE. The store is done with two permuting
9791 insn's that represent an xxpermdi and an stxvd2x. */
9793 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9795 /* This should never be called during or after LRA, because it does
9796 not re-permute the source register. It is intended only for use
9798 gcc_assert (!lra_in_progress && !reload_completed);
9800 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9802 if (mode == TImode || mode == V1TImode)
9805 dest = adjust_address (dest, V2DImode, 0);
9806 source = gen_lowpart (V2DImode, source);
9809 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9810 rs6000_emit_le_vsx_permute (tmp, source, mode);
9811 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9814 /* Emit a sequence representing a little-endian VSX load or store,
9815 moving data from SOURCE to DEST in mode MODE. This is done
9816 separately from rs6000_emit_move to ensure it is called only
9817 during expand. LE VSX loads and stores introduced later are
9818 handled with a split. The expand-time RTL generation allows
9819 us to optimize away redundant pairs of register-permutes. */
9821 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9823 gcc_assert (!BYTES_BIG_ENDIAN
9824 && VECTOR_MEM_VSX_P (mode)
9825 && !TARGET_P9_VECTOR
9826 && !gpr_or_gpr_p (dest, source)
9827 && (MEM_P (source) ^ MEM_P (dest)));
9831 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9832 rs6000_emit_le_vsx_load (dest, source, mode);
9836 if (!REG_P (source))
9837 source = force_reg (mode, source);
9838 rs6000_emit_le_vsx_store (dest, source, mode);
9842 /* Return whether a SFmode or SImode move can be done without converting one
9843 mode to another. This arrises when we have:
9845 (SUBREG:SF (REG:SI ...))
9846 (SUBREG:SI (REG:SF ...))
9848 and one of the values is in a floating point/vector register, where SFmode
9849 scalars are stored in DFmode format. */
9852 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9854 if (TARGET_ALLOW_SF_SUBREG)
9857 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9860 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9863 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9864 if (SUBREG_P (dest))
9866 rtx dest_subreg = SUBREG_REG (dest);
9867 rtx src_subreg = SUBREG_REG (src);
9868 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9875 /* Helper function to change moves with:
9877 (SUBREG:SF (REG:SI)) and
9878 (SUBREG:SI (REG:SF))
9880 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9881 values are stored as DFmode values in the VSX registers. We need to convert
9882 the bits before we can use a direct move or operate on the bits in the
9883 vector register as an integer type.
9885 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9888 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9890 if (TARGET_DIRECT_MOVE_64BIT && !lra_in_progress && !reload_completed
9891 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9892 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9894 rtx inner_source = SUBREG_REG (source);
9895 machine_mode inner_mode = GET_MODE (inner_source);
9897 if (mode == SImode && inner_mode == SFmode)
9899 emit_insn (gen_movsi_from_sf (dest, inner_source));
9903 if (mode == SFmode && inner_mode == SImode)
9905 emit_insn (gen_movsf_from_si (dest, inner_source));
9913 /* Emit a move from SOURCE to DEST in mode MODE. */
9915 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9919 operands[1] = source;
9921 if (TARGET_DEBUG_ADDR)
9924 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9925 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9926 GET_MODE_NAME (mode),
9929 can_create_pseudo_p ());
9931 fprintf (stderr, "source:\n");
9935 /* Check that we get CONST_WIDE_INT only when we should. */
9936 if (CONST_WIDE_INT_P (operands[1])
9937 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9940 #ifdef HAVE_AS_GNU_ATTRIBUTE
9941 /* If we use a long double type, set the flags in .gnu_attribute that say
9942 what the long double type is. This is to allow the linker's warning
9943 message for the wrong long double to be useful, even if the function does
9944 not do a call (for example, doing a 128-bit add on power9 if the long
9945 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9946 used if they aren't the default long dobule type. */
9947 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9949 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9950 rs6000_passes_float = rs6000_passes_long_double = true;
9952 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9953 rs6000_passes_float = rs6000_passes_long_double = true;
9957 /* See if we need to special case SImode/SFmode SUBREG moves. */
9958 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9959 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9962 /* Check if GCC is setting up a block move that will end up using FP
9963 registers as temporaries. We must make sure this is acceptable. */
9964 if (MEM_P (operands[0])
9965 && MEM_P (operands[1])
9967 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9968 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9969 && ! (rs6000_slow_unaligned_access (SImode,
9970 (MEM_ALIGN (operands[0]) > 32
9971 ? 32 : MEM_ALIGN (operands[0])))
9972 || rs6000_slow_unaligned_access (SImode,
9973 (MEM_ALIGN (operands[1]) > 32
9974 ? 32 : MEM_ALIGN (operands[1]))))
9975 && ! MEM_VOLATILE_P (operands [0])
9976 && ! MEM_VOLATILE_P (operands [1]))
9978 emit_move_insn (adjust_address (operands[0], SImode, 0),
9979 adjust_address (operands[1], SImode, 0));
9980 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9981 adjust_address (copy_rtx (operands[1]), SImode, 4));
9985 if (can_create_pseudo_p () && MEM_P (operands[0])
9986 && !gpc_reg_operand (operands[1], mode))
9987 operands[1] = force_reg (mode, operands[1]);
9989 /* Recognize the case where operand[1] is a reference to thread-local
9990 data and load its address to a register. */
9991 if (tls_referenced_p (operands[1]))
9993 enum tls_model model;
9994 rtx tmp = operands[1];
9997 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9999 addend = XEXP (XEXP (tmp, 0), 1);
10000 tmp = XEXP (XEXP (tmp, 0), 0);
10003 gcc_assert (SYMBOL_REF_P (tmp));
10004 model = SYMBOL_REF_TLS_MODEL (tmp);
10005 gcc_assert (model != 0);
10007 tmp = rs6000_legitimize_tls_address (tmp, model);
10010 tmp = gen_rtx_PLUS (mode, tmp, addend);
10011 tmp = force_operand (tmp, operands[0]);
10016 /* 128-bit constant floating-point values on Darwin should really be loaded
10017 as two parts. However, this premature splitting is a problem when DFmode
10018 values can go into Altivec registers. */
10019 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
10020 && !reg_addr[DFmode].scalar_in_vmx_p)
10022 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10023 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10025 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10026 GET_MODE_SIZE (DFmode)),
10027 simplify_gen_subreg (DFmode, operands[1], mode,
10028 GET_MODE_SIZE (DFmode)),
10033 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10034 p1:SD) if p1 is not of floating point class and p0 is spilled as
10035 we can have no analogous movsd_store for this. */
10036 if (lra_in_progress && mode == DDmode
10037 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10038 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10039 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
10040 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10043 int regno = REGNO (SUBREG_REG (operands[1]));
10045 if (!HARD_REGISTER_NUM_P (regno))
10047 cl = reg_preferred_class (regno);
10048 regno = reg_renumber[regno];
10050 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10052 if (regno >= 0 && ! FP_REGNO_P (regno))
10055 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10056 operands[1] = SUBREG_REG (operands[1]);
10059 if (lra_in_progress
10061 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10062 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10063 && (REG_P (operands[1])
10064 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
10066 int regno = reg_or_subregno (operands[1]);
10069 if (!HARD_REGISTER_NUM_P (regno))
10071 cl = reg_preferred_class (regno);
10072 gcc_assert (cl != NO_REGS);
10073 regno = reg_renumber[regno];
10075 regno = ira_class_hard_regs[cl][0];
10077 if (FP_REGNO_P (regno))
10079 if (GET_MODE (operands[0]) != DDmode)
10080 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10081 emit_insn (gen_movsd_store (operands[0], operands[1]));
10083 else if (INT_REGNO_P (regno))
10084 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10089 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10090 p:DD)) if p0 is not of floating point class and p1 is spilled as
10091 we can have no analogous movsd_load for this. */
10092 if (lra_in_progress && mode == DDmode
10093 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
10094 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10095 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10096 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10099 int regno = REGNO (SUBREG_REG (operands[0]));
10101 if (!HARD_REGISTER_NUM_P (regno))
10103 cl = reg_preferred_class (regno);
10104 regno = reg_renumber[regno];
10106 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10108 if (regno >= 0 && ! FP_REGNO_P (regno))
10111 operands[0] = SUBREG_REG (operands[0]);
10112 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10115 if (lra_in_progress
10117 && (REG_P (operands[0])
10118 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
10119 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10120 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10122 int regno = reg_or_subregno (operands[0]);
10125 if (!HARD_REGISTER_NUM_P (regno))
10127 cl = reg_preferred_class (regno);
10128 gcc_assert (cl != NO_REGS);
10129 regno = reg_renumber[regno];
10131 regno = ira_class_hard_regs[cl][0];
10133 if (FP_REGNO_P (regno))
10135 if (GET_MODE (operands[1]) != DDmode)
10136 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10137 emit_insn (gen_movsd_load (operands[0], operands[1]));
10139 else if (INT_REGNO_P (regno))
10140 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10146 /* FIXME: In the long term, this switch statement should go away
10147 and be replaced by a sequence of tests based on things like
10153 if (CONSTANT_P (operands[1])
10154 && !CONST_INT_P (operands[1]))
10155 operands[1] = force_const_mem (mode, operands[1]);
10162 if (FLOAT128_2REG_P (mode))
10163 rs6000_eliminate_indexed_memrefs (operands);
10170 if (CONSTANT_P (operands[1])
10171 && ! easy_fp_constant (operands[1], mode))
10172 operands[1] = force_const_mem (mode, operands[1]);
10182 if (CONSTANT_P (operands[1])
10183 && !easy_vector_constant (operands[1], mode))
10184 operands[1] = force_const_mem (mode, operands[1]);
10189 /* Use default pattern for address of ELF small data */
10192 && DEFAULT_ABI == ABI_V4
10193 && (SYMBOL_REF_P (operands[1])
10194 || GET_CODE (operands[1]) == CONST)
10195 && small_data_operand (operands[1], mode))
10197 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10201 if (DEFAULT_ABI == ABI_V4
10202 && mode == Pmode && mode == SImode
10203 && flag_pic == 1 && got_operand (operands[1], mode))
10205 emit_insn (gen_movsi_got (operands[0], operands[1]));
10209 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10213 && CONSTANT_P (operands[1])
10214 && GET_CODE (operands[1]) != HIGH
10215 && !CONST_INT_P (operands[1]))
10217 rtx target = (!can_create_pseudo_p ()
10219 : gen_reg_rtx (mode));
10221 /* If this is a function address on -mcall-aixdesc,
10222 convert it to the address of the descriptor. */
10223 if (DEFAULT_ABI == ABI_AIX
10224 && SYMBOL_REF_P (operands[1])
10225 && XSTR (operands[1], 0)[0] == '.')
10227 const char *name = XSTR (operands[1], 0);
10229 while (*name == '.')
10231 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10232 CONSTANT_POOL_ADDRESS_P (new_ref)
10233 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10234 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10235 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10236 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10237 operands[1] = new_ref;
10240 if (DEFAULT_ABI == ABI_DARWIN)
10243 if (MACHO_DYNAMIC_NO_PIC_P)
10245 /* Take care of any required data indirection. */
10246 operands[1] = rs6000_machopic_legitimize_pic_address (
10247 operands[1], mode, operands[0]);
10248 if (operands[0] != operands[1])
10249 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10253 emit_insn (gen_macho_high (target, operands[1]));
10254 emit_insn (gen_macho_low (operands[0], target, operands[1]));
10258 emit_insn (gen_elf_high (target, operands[1]));
10259 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10263 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10264 and we have put it in the TOC, we just need to make a TOC-relative
10265 reference to it. */
10267 && SYMBOL_REF_P (operands[1])
10268 && use_toc_relative_ref (operands[1], mode))
10269 operands[1] = create_TOC_reference (operands[1], operands[0]);
10270 else if (mode == Pmode
10271 && CONSTANT_P (operands[1])
10272 && GET_CODE (operands[1]) != HIGH
10273 && ((REG_P (operands[0])
10274 && FP_REGNO_P (REGNO (operands[0])))
10275 || !CONST_INT_P (operands[1])
10276 || (num_insns_constant (operands[1], mode)
10277 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10278 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10279 && (TARGET_CMODEL == CMODEL_SMALL
10280 || can_create_pseudo_p ()
10281 || (REG_P (operands[0])
10282 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10286 /* Darwin uses a special PIC legitimizer. */
10287 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10290 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10292 if (operands[0] != operands[1])
10293 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10298 /* If we are to limit the number of things we put in the TOC and
10299 this is a symbol plus a constant we can add in one insn,
10300 just put the symbol in the TOC and add the constant. */
10301 if (GET_CODE (operands[1]) == CONST
10302 && TARGET_NO_SUM_IN_TOC
10303 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10304 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10305 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10306 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
10307 && ! side_effects_p (operands[0]))
10310 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10311 rtx other = XEXP (XEXP (operands[1], 0), 1);
10313 sym = force_reg (mode, sym);
10314 emit_insn (gen_add3_insn (operands[0], sym, other));
10318 operands[1] = force_const_mem (mode, operands[1]);
10321 && SYMBOL_REF_P (XEXP (operands[1], 0))
10322 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10324 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10326 operands[1] = gen_const_mem (mode, tocref);
10327 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10333 if (!VECTOR_MEM_VSX_P (TImode))
10334 rs6000_eliminate_indexed_memrefs (operands);
10338 rs6000_eliminate_indexed_memrefs (operands);
10342 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10345 /* Above, we may have called force_const_mem which may have returned
10346 an invalid address. If we can, fix this up; otherwise, reload will
10347 have to deal with it. */
10348 if (MEM_P (operands[1]))
10349 operands[1] = validize_mem (operands[1]);
10351 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10354 /* Nonzero if we can use a floating-point register to pass this arg. */
10355 #define USE_FP_FOR_ARG_P(CUM,MODE) \
10356 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
10357 && (CUM)->fregno <= FP_ARG_MAX_REG \
10358 && TARGET_HARD_FLOAT)
10360 /* Nonzero if we can use an AltiVec register to pass this arg. */
10361 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
10362 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
10363 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
10364 && TARGET_ALTIVEC_ABI \
10367 /* Walk down the type tree of TYPE counting consecutive base elements.
10368 If *MODEP is VOIDmode, then set it to the first valid floating point
10369 or vector type. If a non-floating point or vector type is found, or
10370 if a floating point or vector type that doesn't match a non-VOIDmode
10371 *MODEP is found, then return -1, otherwise return the count in the
10375 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
10378 HOST_WIDE_INT size;
10380 switch (TREE_CODE (type))
10383 mode = TYPE_MODE (type);
10384 if (!SCALAR_FLOAT_MODE_P (mode))
10387 if (*modep == VOIDmode)
10390 if (*modep == mode)
10396 mode = TYPE_MODE (TREE_TYPE (type));
10397 if (!SCALAR_FLOAT_MODE_P (mode))
10400 if (*modep == VOIDmode)
10403 if (*modep == mode)
10409 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
10412 /* Use V4SImode as representative of all 128-bit vector types. */
10413 size = int_size_in_bytes (type);
10423 if (*modep == VOIDmode)
10426 /* Vector modes are considered to be opaque: two vectors are
10427 equivalent for the purposes of being homogeneous aggregates
10428 if they are the same size. */
10429 if (*modep == mode)
10437 tree index = TYPE_DOMAIN (type);
10439 /* Can't handle incomplete types nor sizes that are not
10441 if (!COMPLETE_TYPE_P (type)
10442 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10445 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
10448 || !TYPE_MAX_VALUE (index)
10449 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10450 || !TYPE_MIN_VALUE (index)
10451 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10455 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10456 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10458 /* There must be no padding. */
10459 if (wi::to_wide (TYPE_SIZE (type))
10460 != count * GET_MODE_BITSIZE (*modep))
10472 /* Can't handle incomplete types nor sizes that are not
10474 if (!COMPLETE_TYPE_P (type)
10475 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10478 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10480 if (TREE_CODE (field) != FIELD_DECL)
10483 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10486 count += sub_count;
10489 /* There must be no padding. */
10490 if (wi::to_wide (TYPE_SIZE (type))
10491 != count * GET_MODE_BITSIZE (*modep))
10498 case QUAL_UNION_TYPE:
10500 /* These aren't very interesting except in a degenerate case. */
10505 /* Can't handle incomplete types nor sizes that are not
10507 if (!COMPLETE_TYPE_P (type)
10508 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10511 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10513 if (TREE_CODE (field) != FIELD_DECL)
10516 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10519 count = count > sub_count ? count : sub_count;
10522 /* There must be no padding. */
10523 if (wi::to_wide (TYPE_SIZE (type))
10524 != count * GET_MODE_BITSIZE (*modep))
10537 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10538 float or vector aggregate that shall be passed in FP/vector registers
10539 according to the ELFv2 ABI, return the homogeneous element mode in
10540 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10542 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10545 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10546 machine_mode *elt_mode,
10549 /* Note that we do not accept complex types at the top level as
10550 homogeneous aggregates; these types are handled via the
10551 targetm.calls.split_complex_arg mechanism. Complex types
10552 can be elements of homogeneous aggregates, however. */
10553 if (TARGET_HARD_FLOAT && DEFAULT_ABI == ABI_ELFv2 && type
10554 && AGGREGATE_TYPE_P (type))
10556 machine_mode field_mode = VOIDmode;
10557 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10559 if (field_count > 0)
10561 int reg_size = ALTIVEC_OR_VSX_VECTOR_MODE (field_mode) ? 16 : 8;
10562 int field_size = ROUND_UP (GET_MODE_SIZE (field_mode), reg_size);
10564 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10565 up to AGGR_ARG_NUM_REG registers. */
10566 if (field_count * field_size <= AGGR_ARG_NUM_REG * reg_size)
10569 *elt_mode = field_mode;
10571 *n_elts = field_count;
10584 /* Return a nonzero value to say to return the function value in
10585 memory, just as large structures are always returned. TYPE will be
10586 the data type of the value, and FNTYPE will be the type of the
10587 function doing the returning, or @code{NULL} for libcalls.
10589 The AIX ABI for the RS/6000 specifies that all structures are
10590 returned in memory. The Darwin ABI does the same.
10592 For the Darwin 64 Bit ABI, a function result can be returned in
10593 registers or in memory, depending on the size of the return data
10594 type. If it is returned in registers, the value occupies the same
10595 registers as it would if it were the first and only function
10596 argument. Otherwise, the function places its result in memory at
10597 the location pointed to by GPR3.
10599 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10600 but a draft put them in memory, and GCC used to implement the draft
10601 instead of the final standard. Therefore, aix_struct_return
10602 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10603 compatibility can change DRAFT_V4_STRUCT_RET to override the
10604 default, and -m switches get the final word. See
10605 rs6000_option_override_internal for more details.
10607 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10608 long double support is enabled. These values are returned in memory.
10610 int_size_in_bytes returns -1 for variable size objects, which go in
10611 memory always. The cast to unsigned makes -1 > 8. */
10614 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10616 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
10618 && rs6000_darwin64_abi
10619 && TREE_CODE (type) == RECORD_TYPE
10620 && int_size_in_bytes (type) > 0)
10622 CUMULATIVE_ARGS valcum;
10626 valcum.fregno = FP_ARG_MIN_REG;
10627 valcum.vregno = ALTIVEC_ARG_MIN_REG;
10628 /* Do a trial code generation as if this were going to be passed
10629 as an argument; if any part goes in memory, we return NULL. */
10630 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
10633 /* Otherwise fall through to more conventional ABI rules. */
10636 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
10637 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
10641 /* The ELFv2 ABI returns aggregates up to 16B in registers */
10642 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
10643 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
10646 if (AGGREGATE_TYPE_P (type)
10647 && (aix_struct_return
10648 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
10651 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10652 modes only exist for GCC vector types if -maltivec. */
10653 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
10654 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
10657 /* Return synthetic vectors in memory. */
10658 if (TREE_CODE (type) == VECTOR_TYPE
10659 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10661 static bool warned_for_return_big_vectors = false;
10662 if (!warned_for_return_big_vectors)
10664 warning (OPT_Wpsabi, "GCC vector returned by reference: "
10665 "non-standard ABI extension with no compatibility "
10667 warned_for_return_big_vectors = true;
10672 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
10673 && FLOAT128_IEEE_P (TYPE_MODE (type)))
10679 /* Specify whether values returned in registers should be at the most
10680 significant end of a register. We want aggregates returned by
10681 value to match the way aggregates are passed to functions. */
10684 rs6000_return_in_msb (const_tree valtype)
10686 return (DEFAULT_ABI == ABI_ELFv2
10687 && BYTES_BIG_ENDIAN
10688 && AGGREGATE_TYPE_P (valtype)
10689 && (rs6000_function_arg_padding (TYPE_MODE (valtype), valtype)
10693 #ifdef HAVE_AS_GNU_ATTRIBUTE
10694 /* Return TRUE if a call to function FNDECL may be one that
10695 potentially affects the function calling ABI of the object file. */
10698 call_ABI_of_interest (tree fndecl)
10700 if (rs6000_gnu_attr && symtab->state == EXPANSION)
10702 struct cgraph_node *c_node;
10704 /* Libcalls are always interesting. */
10705 if (fndecl == NULL_TREE)
10708 /* Any call to an external function is interesting. */
10709 if (DECL_EXTERNAL (fndecl))
10712 /* Interesting functions that we are emitting in this object file. */
10713 c_node = cgraph_node::get (fndecl);
10714 c_node = c_node->ultimate_alias_target ();
10715 return !c_node->only_called_directly_p ();
10721 /* Initialize a variable CUM of type CUMULATIVE_ARGS
10722 for a call to a function whose data type is FNTYPE.
10723 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
10725 For incoming args we set the number of arguments in the prototype large
10726 so we never return a PARALLEL. */
10729 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
10730 rtx libname ATTRIBUTE_UNUSED, int incoming,
10731 int libcall, int n_named_args,
10733 machine_mode return_mode ATTRIBUTE_UNUSED)
10735 static CUMULATIVE_ARGS zero_cumulative;
10737 *cum = zero_cumulative;
10739 cum->fregno = FP_ARG_MIN_REG;
10740 cum->vregno = ALTIVEC_ARG_MIN_REG;
10741 cum->prototype = (fntype && prototype_p (fntype));
10742 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
10743 ? CALL_LIBCALL : CALL_NORMAL);
10744 cum->sysv_gregno = GP_ARG_MIN_REG;
10745 cum->stdarg = stdarg_p (fntype);
10746 cum->libcall = libcall;
10748 cum->nargs_prototype = 0;
10749 if (incoming || cum->prototype)
10750 cum->nargs_prototype = n_named_args;
10752 /* Check for a longcall attribute. */
10753 if ((!fntype && rs6000_default_long_calls)
10755 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
10756 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
10757 cum->call_cookie |= CALL_LONG;
10758 else if (DEFAULT_ABI != ABI_DARWIN)
10760 bool is_local = (fndecl
10761 && !DECL_EXTERNAL (fndecl)
10762 && !DECL_WEAK (fndecl)
10763 && (*targetm.binds_local_p) (fndecl));
10769 && lookup_attribute ("noplt", TYPE_ATTRIBUTES (fntype)))
10770 cum->call_cookie |= CALL_LONG;
10775 && lookup_attribute ("plt", TYPE_ATTRIBUTES (fntype))))
10776 cum->call_cookie |= CALL_LONG;
10780 if (TARGET_DEBUG_ARG)
10782 fprintf (stderr, "\ninit_cumulative_args:");
10785 tree ret_type = TREE_TYPE (fntype);
10786 fprintf (stderr, " ret code = %s,",
10787 get_tree_code_name (TREE_CODE (ret_type)));
10790 if (cum->call_cookie & CALL_LONG)
10791 fprintf (stderr, " longcall,");
10793 fprintf (stderr, " proto = %d, nargs = %d\n",
10794 cum->prototype, cum->nargs_prototype);
10797 #ifdef HAVE_AS_GNU_ATTRIBUTE
10798 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
10800 cum->escapes = call_ABI_of_interest (fndecl);
10807 return_type = TREE_TYPE (fntype);
10808 return_mode = TYPE_MODE (return_type);
10811 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
10813 if (return_type != NULL)
10815 if (TREE_CODE (return_type) == RECORD_TYPE
10816 && TYPE_TRANSPARENT_AGGR (return_type))
10818 return_type = TREE_TYPE (first_field (return_type));
10819 return_mode = TYPE_MODE (return_type);
10821 if (AGGREGATE_TYPE_P (return_type)
10822 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
10824 rs6000_returns_struct = true;
10826 if (SCALAR_FLOAT_MODE_P (return_mode))
10828 rs6000_passes_float = true;
10829 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
10830 && (FLOAT128_IBM_P (return_mode)
10831 || FLOAT128_IEEE_P (return_mode)
10832 || (return_type != NULL
10833 && (TYPE_MAIN_VARIANT (return_type)
10834 == long_double_type_node))))
10835 rs6000_passes_long_double = true;
10837 /* Note if we passed or return a IEEE 128-bit type. We changed
10838 the mangling for these types, and we may need to make an alias
10839 with the old mangling. */
10840 if (FLOAT128_IEEE_P (return_mode))
10841 rs6000_passes_ieee128 = true;
10843 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode))
10844 rs6000_passes_vector = true;
10851 && TARGET_ALTIVEC_ABI
10852 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
10854 error ("cannot return value in vector register because"
10855 " altivec instructions are disabled, use %qs"
10856 " to enable them", "-maltivec");
10860 /* The mode the ABI uses for a word. This is not the same as word_mode
10861 for -m32 -mpowerpc64. This is used to implement various target hooks. */
10863 static scalar_int_mode
10864 rs6000_abi_word_mode (void)
10866 return TARGET_32BIT ? SImode : DImode;
10869 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
10871 rs6000_offload_options (void)
10874 return xstrdup ("-foffload-abi=lp64");
10876 return xstrdup ("-foffload-abi=ilp32");
10879 /* On rs6000, function arguments are promoted, as are function return
10882 static machine_mode
10883 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
10885 int *punsignedp ATTRIBUTE_UNUSED,
10888 PROMOTE_MODE (mode, *punsignedp, type);
10893 /* Return true if TYPE must be passed on the stack and not in registers. */
10896 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
10898 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
10899 return must_pass_in_stack_var_size (mode, type);
10901 return must_pass_in_stack_var_size_or_pad (mode, type);
10905 is_complex_IBM_long_double (machine_mode mode)
10907 return mode == ICmode || (mode == TCmode && FLOAT128_IBM_P (TCmode));
10910 /* Whether ABI_V4 passes MODE args to a function in floating point
10914 abi_v4_pass_in_fpr (machine_mode mode, bool named)
10916 if (!TARGET_HARD_FLOAT)
10918 if (mode == DFmode)
10920 if (mode == SFmode && named)
10922 /* ABI_V4 passes complex IBM long double in 8 gprs.
10923 Stupid, but we can't change the ABI now. */
10924 if (is_complex_IBM_long_double (mode))
10926 if (FLOAT128_2REG_P (mode))
10928 if (DECIMAL_FLOAT_MODE_P (mode))
10933 /* Implement TARGET_FUNCTION_ARG_PADDING.
10935 For the AIX ABI structs are always stored left shifted in their
10938 static pad_direction
10939 rs6000_function_arg_padding (machine_mode mode, const_tree type)
10941 #ifndef AGGREGATE_PADDING_FIXED
10942 #define AGGREGATE_PADDING_FIXED 0
10944 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
10945 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
10948 if (!AGGREGATE_PADDING_FIXED)
10950 /* GCC used to pass structures of the same size as integer types as
10951 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
10952 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
10953 passed padded downward, except that -mstrict-align further
10954 muddied the water in that multi-component structures of 2 and 4
10955 bytes in size were passed padded upward.
10957 The following arranges for best compatibility with previous
10958 versions of gcc, but removes the -mstrict-align dependency. */
10959 if (BYTES_BIG_ENDIAN)
10961 HOST_WIDE_INT size = 0;
10963 if (mode == BLKmode)
10965 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
10966 size = int_size_in_bytes (type);
10969 size = GET_MODE_SIZE (mode);
10971 if (size == 1 || size == 2 || size == 4)
10972 return PAD_DOWNWARD;
10977 if (AGGREGATES_PAD_UPWARD_ALWAYS)
10979 if (type != 0 && AGGREGATE_TYPE_P (type))
10983 /* Fall back to the default. */
10984 return default_function_arg_padding (mode, type);
10987 /* If defined, a C expression that gives the alignment boundary, in bits,
10988 of an argument with the specified mode and type. If it is not defined,
10989 PARM_BOUNDARY is used for all arguments.
10991 V.4 wants long longs and doubles to be double word aligned. Just
10992 testing the mode size is a boneheaded way to do this as it means
10993 that other types such as complex int are also double word aligned.
10994 However, we're stuck with this because changing the ABI might break
10995 existing library interfaces.
10997 Quadword align Altivec/VSX vectors.
10998 Quadword align large synthetic vector types. */
11000 static unsigned int
11001 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
11003 machine_mode elt_mode;
11006 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11008 if (DEFAULT_ABI == ABI_V4
11009 && (GET_MODE_SIZE (mode) == 8
11010 || (TARGET_HARD_FLOAT
11011 && !is_complex_IBM_long_double (mode)
11012 && FLOAT128_2REG_P (mode))))
11014 else if (FLOAT128_VECTOR_P (mode))
11016 else if (type && TREE_CODE (type) == VECTOR_TYPE
11017 && int_size_in_bytes (type) >= 8
11018 && int_size_in_bytes (type) < 16)
11020 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11021 || (type && TREE_CODE (type) == VECTOR_TYPE
11022 && int_size_in_bytes (type) >= 16))
11025 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11026 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11027 -mcompat-align-parm is used. */
11028 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
11029 || DEFAULT_ABI == ABI_ELFv2)
11030 && type && TYPE_ALIGN (type) > 64)
11032 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11033 or homogeneous float/vector aggregates here. We already handled
11034 vector aggregates above, but still need to check for float here. */
11035 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11036 && !SCALAR_FLOAT_MODE_P (elt_mode));
11038 /* We used to check for BLKmode instead of the above aggregate type
11039 check. Warn when this results in any difference to the ABI. */
11040 if (aggregate_p != (mode == BLKmode))
11042 static bool warned;
11043 if (!warned && warn_psabi)
11046 inform (input_location,
11047 "the ABI of passing aggregates with %d-byte alignment"
11048 " has changed in GCC 5",
11049 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11057 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11058 implement the "aggregate type" check as a BLKmode check here; this
11059 means certain aggregate types are in fact not aligned. */
11060 if (TARGET_MACHO && rs6000_darwin64_abi
11062 && type && TYPE_ALIGN (type) > 64)
11065 return PARM_BOUNDARY;
11068 /* The offset in words to the start of the parameter save area. */
11070 static unsigned int
11071 rs6000_parm_offset (void)
11073 return (DEFAULT_ABI == ABI_V4 ? 2
11074 : DEFAULT_ABI == ABI_ELFv2 ? 4
11078 /* For a function parm of MODE and TYPE, return the starting word in
11079 the parameter area. NWORDS of the parameter area are already used. */
11081 static unsigned int
11082 rs6000_parm_start (machine_mode mode, const_tree type,
11083 unsigned int nwords)
11085 unsigned int align;
11087 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11088 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11091 /* Compute the size (in words) of a function argument. */
11093 static unsigned long
11094 rs6000_arg_size (machine_mode mode, const_tree type)
11096 unsigned long size;
11098 if (mode != BLKmode)
11099 size = GET_MODE_SIZE (mode);
11101 size = int_size_in_bytes (type);
11104 return (size + 3) >> 2;
11106 return (size + 7) >> 3;
11109 /* Use this to flush pending int fields. */
11112 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
11113 HOST_WIDE_INT bitpos, int final)
11115 unsigned int startbit, endbit;
11116 int intregs, intoffset;
11118 /* Handle the situations where a float is taking up the first half
11119 of the GPR, and the other half is empty (typically due to
11120 alignment restrictions). We can detect this by a 8-byte-aligned
11121 int field, or by seeing that this is the final flush for this
11122 argument. Count the word and continue on. */
11123 if (cum->floats_in_gpr == 1
11124 && (cum->intoffset % 64 == 0
11125 || (cum->intoffset == -1 && final)))
11128 cum->floats_in_gpr = 0;
11131 if (cum->intoffset == -1)
11134 intoffset = cum->intoffset;
11135 cum->intoffset = -1;
11136 cum->floats_in_gpr = 0;
11138 if (intoffset % BITS_PER_WORD != 0)
11140 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
11141 if (!int_mode_for_size (bits, 0).exists ())
11143 /* We couldn't find an appropriate mode, which happens,
11144 e.g., in packed structs when there are 3 bytes to load.
11145 Back intoffset back to the beginning of the word in this
11147 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11151 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11152 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11153 intregs = (endbit - startbit) / BITS_PER_WORD;
11154 cum->words += intregs;
11155 /* words should be unsigned. */
11156 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
11158 int pad = (endbit/BITS_PER_WORD) - cum->words;
11163 /* The darwin64 ABI calls for us to recurse down through structs,
11164 looking for elements passed in registers. Unfortunately, we have
11165 to track int register count here also because of misalignments
11166 in powerpc alignment mode. */
11169 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
11171 HOST_WIDE_INT startbitpos)
11175 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11176 if (TREE_CODE (f) == FIELD_DECL)
11178 HOST_WIDE_INT bitpos = startbitpos;
11179 tree ftype = TREE_TYPE (f);
11181 if (ftype == error_mark_node)
11183 mode = TYPE_MODE (ftype);
11185 if (DECL_SIZE (f) != 0
11186 && tree_fits_uhwi_p (bit_position (f)))
11187 bitpos += int_bit_position (f);
11189 /* ??? FIXME: else assume zero offset. */
11191 if (TREE_CODE (ftype) == RECORD_TYPE)
11192 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
11193 else if (USE_FP_FOR_ARG_P (cum, mode))
11195 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
11196 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11197 cum->fregno += n_fpregs;
11198 /* Single-precision floats present a special problem for
11199 us, because they are smaller than an 8-byte GPR, and so
11200 the structure-packing rules combined with the standard
11201 varargs behavior mean that we want to pack float/float
11202 and float/int combinations into a single register's
11203 space. This is complicated by the arg advance flushing,
11204 which works on arbitrarily large groups of int-type
11206 if (mode == SFmode)
11208 if (cum->floats_in_gpr == 1)
11210 /* Two floats in a word; count the word and reset
11211 the float count. */
11213 cum->floats_in_gpr = 0;
11215 else if (bitpos % 64 == 0)
11217 /* A float at the beginning of an 8-byte word;
11218 count it and put off adjusting cum->words until
11219 we see if a arg advance flush is going to do it
11221 cum->floats_in_gpr++;
11225 /* The float is at the end of a word, preceded
11226 by integer fields, so the arg advance flush
11227 just above has already set cum->words and
11228 everything is taken care of. */
11232 cum->words += n_fpregs;
11234 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11236 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11240 else if (cum->intoffset == -1)
11241 cum->intoffset = bitpos;
11245 /* Check for an item that needs to be considered specially under the darwin 64
11246 bit ABI. These are record types where the mode is BLK or the structure is
11247 8 bytes in size. */
11249 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
11251 return rs6000_darwin64_abi
11252 && ((mode == BLKmode
11253 && TREE_CODE (type) == RECORD_TYPE
11254 && int_size_in_bytes (type) > 0)
11255 || (type && TREE_CODE (type) == RECORD_TYPE
11256 && int_size_in_bytes (type) == 8)) ? 1 : 0;
11259 /* Update the data in CUM to advance over an argument
11260 of mode MODE and data type TYPE.
11261 (TYPE is null for libcalls where that information may not be available.)
11263 Note that for args passed by reference, function_arg will be called
11264 with MODE and TYPE set to that of the pointer to the arg, not the arg
11268 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
11269 const_tree type, bool named, int depth)
11271 machine_mode elt_mode;
11274 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11276 /* Only tick off an argument if we're not recursing. */
11278 cum->nargs_prototype--;
11280 #ifdef HAVE_AS_GNU_ATTRIBUTE
11281 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
11284 if (SCALAR_FLOAT_MODE_P (mode))
11286 rs6000_passes_float = true;
11287 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11288 && (FLOAT128_IBM_P (mode)
11289 || FLOAT128_IEEE_P (mode)
11291 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
11292 rs6000_passes_long_double = true;
11294 /* Note if we passed or return a IEEE 128-bit type. We changed the
11295 mangling for these types, and we may need to make an alias with
11296 the old mangling. */
11297 if (FLOAT128_IEEE_P (mode))
11298 rs6000_passes_ieee128 = true;
11300 if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
11301 rs6000_passes_vector = true;
11305 if (TARGET_ALTIVEC_ABI
11306 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11307 || (type && TREE_CODE (type) == VECTOR_TYPE
11308 && int_size_in_bytes (type) == 16)))
11310 bool stack = false;
11312 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11314 cum->vregno += n_elts;
11316 if (!TARGET_ALTIVEC)
11317 error ("cannot pass argument in vector register because"
11318 " altivec instructions are disabled, use %qs"
11319 " to enable them", "-maltivec");
11321 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
11322 even if it is going to be passed in a vector register.
11323 Darwin does the same for variable-argument functions. */
11324 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11326 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
11336 /* Vector parameters must be 16-byte aligned. In 32-bit
11337 mode this means we need to take into account the offset
11338 to the parameter save area. In 64-bit mode, they just
11339 have to start on an even word, since the parameter save
11340 area is 16-byte aligned. */
11342 align = -(rs6000_parm_offset () + cum->words) & 3;
11344 align = cum->words & 1;
11345 cum->words += align + rs6000_arg_size (mode, type);
11347 if (TARGET_DEBUG_ARG)
11349 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
11350 cum->words, align);
11351 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
11352 cum->nargs_prototype, cum->prototype,
11353 GET_MODE_NAME (mode));
11357 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11359 int size = int_size_in_bytes (type);
11360 /* Variable sized types have size == -1 and are
11361 treated as if consisting entirely of ints.
11362 Pad to 16 byte boundary if needed. */
11363 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11364 && (cum->words % 2) != 0)
11366 /* For varargs, we can just go up by the size of the struct. */
11368 cum->words += (size + 7) / 8;
11371 /* It is tempting to say int register count just goes up by
11372 sizeof(type)/8, but this is wrong in a case such as
11373 { int; double; int; } [powerpc alignment]. We have to
11374 grovel through the fields for these too. */
11375 cum->intoffset = 0;
11376 cum->floats_in_gpr = 0;
11377 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
11378 rs6000_darwin64_record_arg_advance_flush (cum,
11379 size * BITS_PER_UNIT, 1);
11381 if (TARGET_DEBUG_ARG)
11383 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
11384 cum->words, TYPE_ALIGN (type), size);
11386 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
11387 cum->nargs_prototype, cum->prototype,
11388 GET_MODE_NAME (mode));
11391 else if (DEFAULT_ABI == ABI_V4)
11393 if (abi_v4_pass_in_fpr (mode, named))
11395 /* _Decimal128 must use an even/odd register pair. This assumes
11396 that the register number is odd when fregno is odd. */
11397 if (mode == TDmode && (cum->fregno % 2) == 1)
11400 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11401 <= FP_ARG_V4_MAX_REG)
11402 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
11405 cum->fregno = FP_ARG_V4_MAX_REG + 1;
11406 if (mode == DFmode || FLOAT128_IBM_P (mode)
11407 || mode == DDmode || mode == TDmode)
11408 cum->words += cum->words & 1;
11409 cum->words += rs6000_arg_size (mode, type);
11414 int n_words = rs6000_arg_size (mode, type);
11415 int gregno = cum->sysv_gregno;
11417 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
11418 As does any other 2 word item such as complex int due to a
11419 historical mistake. */
11421 gregno += (1 - gregno) & 1;
11423 /* Multi-reg args are not split between registers and stack. */
11424 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11426 /* Long long is aligned on the stack. So are other 2 word
11427 items such as complex int due to a historical mistake. */
11429 cum->words += cum->words & 1;
11430 cum->words += n_words;
11433 /* Note: continuing to accumulate gregno past when we've started
11434 spilling to the stack indicates the fact that we've started
11435 spilling to the stack to expand_builtin_saveregs. */
11436 cum->sysv_gregno = gregno + n_words;
11439 if (TARGET_DEBUG_ARG)
11441 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11442 cum->words, cum->fregno);
11443 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
11444 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
11445 fprintf (stderr, "mode = %4s, named = %d\n",
11446 GET_MODE_NAME (mode), named);
11451 int n_words = rs6000_arg_size (mode, type);
11452 int start_words = cum->words;
11453 int align_words = rs6000_parm_start (mode, type, start_words);
11455 cum->words = align_words + n_words;
11457 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT)
11459 /* _Decimal128 must be passed in an even/odd float register pair.
11460 This assumes that the register number is odd when fregno is
11462 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11464 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11467 if (TARGET_DEBUG_ARG)
11469 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11470 cum->words, cum->fregno);
11471 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11472 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11473 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11474 named, align_words - start_words, depth);
11480 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11481 const_tree type, bool named)
11483 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11487 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11488 structure between cum->intoffset and bitpos to integer registers. */
11491 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11492 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11495 unsigned int regno;
11496 unsigned int startbit, endbit;
11497 int this_regno, intregs, intoffset;
11500 if (cum->intoffset == -1)
11503 intoffset = cum->intoffset;
11504 cum->intoffset = -1;
11506 /* If this is the trailing part of a word, try to only load that
11507 much into the register. Otherwise load the whole register. Note
11508 that in the latter case we may pick up unwanted bits. It's not a
11509 problem at the moment but may wish to revisit. */
11511 if (intoffset % BITS_PER_WORD != 0)
11513 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
11514 if (!int_mode_for_size (bits, 0).exists (&mode))
11516 /* We couldn't find an appropriate mode, which happens,
11517 e.g., in packed structs when there are 3 bytes to load.
11518 Back intoffset back to the beginning of the word in this
11520 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11527 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11528 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11529 intregs = (endbit - startbit) / BITS_PER_WORD;
11530 this_regno = cum->words + intoffset / BITS_PER_WORD;
11532 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11533 cum->use_stack = 1;
11535 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
11539 intoffset /= BITS_PER_UNIT;
11542 regno = GP_ARG_MIN_REG + this_regno;
11543 reg = gen_rtx_REG (mode, regno);
11545 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
11548 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
11552 while (intregs > 0);
11555 /* Recursive workhorse for the following. */
11558 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
11559 HOST_WIDE_INT startbitpos, rtx rvec[],
11564 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11565 if (TREE_CODE (f) == FIELD_DECL)
11567 HOST_WIDE_INT bitpos = startbitpos;
11568 tree ftype = TREE_TYPE (f);
11570 if (ftype == error_mark_node)
11572 mode = TYPE_MODE (ftype);
11574 if (DECL_SIZE (f) != 0
11575 && tree_fits_uhwi_p (bit_position (f)))
11576 bitpos += int_bit_position (f);
11578 /* ??? FIXME: else assume zero offset. */
11580 if (TREE_CODE (ftype) == RECORD_TYPE)
11581 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
11582 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
11584 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
11588 case E_SCmode: mode = SFmode; break;
11589 case E_DCmode: mode = DFmode; break;
11590 case E_TCmode: mode = TFmode; break;
11594 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11595 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
11597 gcc_assert (cum->fregno == FP_ARG_MAX_REG
11598 && (mode == TFmode || mode == TDmode));
11599 /* Long double or _Decimal128 split over regs and memory. */
11600 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
11604 = gen_rtx_EXPR_LIST (VOIDmode,
11605 gen_rtx_REG (mode, cum->fregno++),
11606 GEN_INT (bitpos / BITS_PER_UNIT));
11607 if (FLOAT128_2REG_P (mode))
11610 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11612 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11614 = gen_rtx_EXPR_LIST (VOIDmode,
11615 gen_rtx_REG (mode, cum->vregno++),
11616 GEN_INT (bitpos / BITS_PER_UNIT));
11618 else if (cum->intoffset == -1)
11619 cum->intoffset = bitpos;
11623 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
11624 the register(s) to be used for each field and subfield of a struct
11625 being passed by value, along with the offset of where the
11626 register's value may be found in the block. FP fields go in FP
11627 register, vector fields go in vector registers, and everything
11628 else goes in int registers, packed as in memory.
11630 This code is also used for function return values. RETVAL indicates
11631 whether this is the case.
11633 Much of this is taken from the SPARC V9 port, which has a similar
11634 calling convention. */
11637 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
11638 bool named, bool retval)
11640 rtx rvec[FIRST_PSEUDO_REGISTER];
11641 int k = 1, kbase = 1;
11642 HOST_WIDE_INT typesize = int_size_in_bytes (type);
11643 /* This is a copy; modifications are not visible to our caller. */
11644 CUMULATIVE_ARGS copy_cum = *orig_cum;
11645 CUMULATIVE_ARGS *cum = ©_cum;
11647 /* Pad to 16 byte boundary if needed. */
11648 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11649 && (cum->words % 2) != 0)
11652 cum->intoffset = 0;
11653 cum->use_stack = 0;
11654 cum->named = named;
11656 /* Put entries into rvec[] for individual FP and vector fields, and
11657 for the chunks of memory that go in int regs. Note we start at
11658 element 1; 0 is reserved for an indication of using memory, and
11659 may or may not be filled in below. */
11660 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
11661 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
11663 /* If any part of the struct went on the stack put all of it there.
11664 This hack is because the generic code for
11665 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
11666 parts of the struct are not at the beginning. */
11667 if (cum->use_stack)
11670 return NULL_RTX; /* doesn't go in registers at all */
11672 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11674 if (k > 1 || cum->use_stack)
11675 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
11680 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
11683 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
11688 rtx rvec[GP_ARG_NUM_REG + 1];
11690 if (align_words >= GP_ARG_NUM_REG)
11693 n_units = rs6000_arg_size (mode, type);
11695 /* Optimize the simple case where the arg fits in one gpr, except in
11696 the case of BLKmode due to assign_parms assuming that registers are
11697 BITS_PER_WORD wide. */
11699 || (n_units == 1 && mode != BLKmode))
11700 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11703 if (align_words + n_units > GP_ARG_NUM_REG)
11704 /* Not all of the arg fits in gprs. Say that it goes in memory too,
11705 using a magic NULL_RTX component.
11706 This is not strictly correct. Only some of the arg belongs in
11707 memory, not all of it. However, the normal scheme using
11708 function_arg_partial_nregs can result in unusual subregs, eg.
11709 (subreg:SI (reg:DF) 4), which are not handled well. The code to
11710 store the whole arg to memory is often more efficient than code
11711 to store pieces, and we know that space is available in the right
11712 place for the whole arg. */
11713 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11718 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
11719 rtx off = GEN_INT (i++ * 4);
11720 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11722 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
11724 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11727 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
11728 but must also be copied into the parameter save area starting at
11729 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
11730 to the GPRs and/or memory. Return the number of elements used. */
11733 rs6000_psave_function_arg (machine_mode mode, const_tree type,
11734 int align_words, rtx *rvec)
11738 if (align_words < GP_ARG_NUM_REG)
11740 int n_words = rs6000_arg_size (mode, type);
11742 if (align_words + n_words > GP_ARG_NUM_REG
11744 || (TARGET_32BIT && TARGET_POWERPC64))
11746 /* If this is partially on the stack, then we only
11747 include the portion actually in registers here. */
11748 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11751 if (align_words + n_words > GP_ARG_NUM_REG)
11753 /* Not all of the arg fits in gprs. Say that it goes in memory
11754 too, using a magic NULL_RTX component. Also see comment in
11755 rs6000_mixed_function_arg for why the normal
11756 function_arg_partial_nregs scheme doesn't work in this case. */
11757 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11762 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11763 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
11764 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11766 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11770 /* The whole arg fits in gprs. */
11771 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11772 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
11777 /* It's entirely in memory. */
11778 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11784 /* RVEC is a vector of K components of an argument of mode MODE.
11785 Construct the final function_arg return value from it. */
11788 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
11790 gcc_assert (k >= 1);
11792 /* Avoid returning a PARALLEL in the trivial cases. */
11795 if (XEXP (rvec[0], 0) == NULL_RTX)
11798 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
11799 return XEXP (rvec[0], 0);
11802 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11805 /* Determine where to put an argument to a function.
11806 Value is zero to push the argument on the stack,
11807 or a hard register in which to store the argument.
11809 MODE is the argument's machine mode.
11810 TYPE is the data type of the argument (as a tree).
11811 This is null for libcalls where that information may
11813 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11814 the preceding args and about the function being called. It is
11815 not modified in this routine.
11816 NAMED is nonzero if this argument is a named parameter
11817 (otherwise it is an extra parameter matching an ellipsis).
11819 On RS/6000 the first eight words of non-FP are normally in registers
11820 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
11821 Under V.4, the first 8 FP args are in registers.
11823 If this is floating-point and no prototype is specified, we use
11824 both an FP and integer register (or possibly FP reg and stack). Library
11825 functions (when CALL_LIBCALL is set) always have the proper types for args,
11826 so we can pass the FP value just in one register. emit_library_function
11827 doesn't support PARALLEL anyway.
11829 Note that for args passed by reference, function_arg will be called
11830 with MODE and TYPE set to that of the pointer to the arg, not the arg
11834 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
11835 const_tree type, bool named)
11837 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11838 enum rs6000_abi abi = DEFAULT_ABI;
11839 machine_mode elt_mode;
11842 /* Return a marker to indicate whether CR1 needs to set or clear the
11843 bit that V.4 uses to say fp args were passed in registers.
11844 Assume that we don't need the marker for software floating point,
11845 or compiler generated library calls. */
11846 if (mode == VOIDmode)
11849 && (cum->call_cookie & CALL_LIBCALL) == 0
11851 || (cum->nargs_prototype < 0
11852 && (cum->prototype || TARGET_NO_PROTOTYPE)))
11853 && TARGET_HARD_FLOAT)
11854 return GEN_INT (cum->call_cookie
11855 | ((cum->fregno == FP_ARG_MIN_REG)
11856 ? CALL_V4_SET_FP_ARGS
11857 : CALL_V4_CLEAR_FP_ARGS));
11859 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
11862 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11864 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11866 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
11867 if (rslt != NULL_RTX)
11869 /* Else fall through to usual handling. */
11872 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11874 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11878 /* Do we also need to pass this argument in the parameter save area?
11879 Library support functions for IEEE 128-bit are assumed to not need the
11880 value passed both in GPRs and in vector registers. */
11881 if (TARGET_64BIT && !cum->prototype
11882 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11884 int align_words = ROUND_UP (cum->words, 2);
11885 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11888 /* Describe where this argument goes in the vector registers. */
11889 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
11891 r = gen_rtx_REG (elt_mode, cum->vregno + i);
11892 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11893 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11896 return rs6000_finish_function_arg (mode, rvec, k);
11898 else if (TARGET_ALTIVEC_ABI
11899 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
11900 || (type && TREE_CODE (type) == VECTOR_TYPE
11901 && int_size_in_bytes (type) == 16)))
11903 if (named || abi == ABI_V4)
11907 /* Vector parameters to varargs functions under AIX or Darwin
11908 get passed in memory and possibly also in GPRs. */
11909 int align, align_words, n_words;
11910 machine_mode part_mode;
11912 /* Vector parameters must be 16-byte aligned. In 32-bit
11913 mode this means we need to take into account the offset
11914 to the parameter save area. In 64-bit mode, they just
11915 have to start on an even word, since the parameter save
11916 area is 16-byte aligned. */
11918 align = -(rs6000_parm_offset () + cum->words) & 3;
11920 align = cum->words & 1;
11921 align_words = cum->words + align;
11923 /* Out of registers? Memory, then. */
11924 if (align_words >= GP_ARG_NUM_REG)
11927 if (TARGET_32BIT && TARGET_POWERPC64)
11928 return rs6000_mixed_function_arg (mode, type, align_words);
11930 /* The vector value goes in GPRs. Only the part of the
11931 value in GPRs is reported here. */
11933 n_words = rs6000_arg_size (mode, type);
11934 if (align_words + n_words > GP_ARG_NUM_REG)
11935 /* Fortunately, there are only two possibilities, the value
11936 is either wholly in GPRs or half in GPRs and half not. */
11937 part_mode = DImode;
11939 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
11943 else if (abi == ABI_V4)
11945 if (abi_v4_pass_in_fpr (mode, named))
11947 /* _Decimal128 must use an even/odd register pair. This assumes
11948 that the register number is odd when fregno is odd. */
11949 if (mode == TDmode && (cum->fregno % 2) == 1)
11952 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11953 <= FP_ARG_V4_MAX_REG)
11954 return gen_rtx_REG (mode, cum->fregno);
11960 int n_words = rs6000_arg_size (mode, type);
11961 int gregno = cum->sysv_gregno;
11963 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
11964 As does any other 2 word item such as complex int due to a
11965 historical mistake. */
11967 gregno += (1 - gregno) & 1;
11969 /* Multi-reg args are not split between registers and stack. */
11970 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11973 if (TARGET_32BIT && TARGET_POWERPC64)
11974 return rs6000_mixed_function_arg (mode, type,
11975 gregno - GP_ARG_MIN_REG);
11976 return gen_rtx_REG (mode, gregno);
11981 int align_words = rs6000_parm_start (mode, type, cum->words);
11983 /* _Decimal128 must be passed in an even/odd float register pair.
11984 This assumes that the register number is odd when fregno is odd. */
11985 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11988 if (USE_FP_FOR_ARG_P (cum, elt_mode)
11989 && !(TARGET_AIX && !TARGET_ELF
11990 && type != NULL && AGGREGATE_TYPE_P (type)))
11992 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11995 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11998 /* Do we also need to pass this argument in the parameter
12000 if (type && (cum->nargs_prototype <= 0
12001 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12002 && TARGET_XL_COMPAT
12003 && align_words >= GP_ARG_NUM_REG)))
12004 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12006 /* Describe where this argument goes in the fprs. */
12007 for (i = 0; i < n_elts
12008 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
12010 /* Check if the argument is split over registers and memory.
12011 This can only ever happen for long double or _Decimal128;
12012 complex types are handled via split_complex_arg. */
12013 machine_mode fmode = elt_mode;
12014 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
12016 gcc_assert (FLOAT128_2REG_P (fmode));
12017 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
12020 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
12021 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12022 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12025 /* If there were not enough FPRs to hold the argument, the rest
12026 usually goes into memory. However, if the current position
12027 is still within the register parameter area, a portion may
12028 actually have to go into GPRs.
12030 Note that it may happen that the portion of the argument
12031 passed in the first "half" of the first GPR was already
12032 passed in the last FPR as well.
12034 For unnamed arguments, we already set up GPRs to cover the
12035 whole argument in rs6000_psave_function_arg, so there is
12036 nothing further to do at this point. */
12037 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
12038 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
12039 && cum->nargs_prototype > 0)
12041 static bool warned;
12043 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12044 int n_words = rs6000_arg_size (mode, type);
12046 align_words += fpr_words;
12047 n_words -= fpr_words;
12051 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12052 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
12053 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12055 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12057 if (!warned && warn_psabi)
12060 inform (input_location,
12061 "the ABI of passing homogeneous float aggregates"
12062 " has changed in GCC 5");
12066 return rs6000_finish_function_arg (mode, rvec, k);
12068 else if (align_words < GP_ARG_NUM_REG)
12070 if (TARGET_32BIT && TARGET_POWERPC64)
12071 return rs6000_mixed_function_arg (mode, type, align_words);
12073 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12080 /* For an arg passed partly in registers and partly in memory, this is
12081 the number of bytes passed in registers. For args passed entirely in
12082 registers or entirely in memory, zero. When an arg is described by a
12083 PARALLEL, perhaps using more than one register type, this function
12084 returns the number of bytes used by the first element of the PARALLEL. */
12087 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
12088 tree type, bool named)
12090 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12091 bool passed_in_gprs = true;
12094 machine_mode elt_mode;
12097 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12099 if (DEFAULT_ABI == ABI_V4)
12102 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12104 /* If we are passing this arg in the fixed parameter save area (gprs or
12105 memory) as well as VRs, we do not use the partial bytes mechanism;
12106 instead, rs6000_function_arg will return a PARALLEL including a memory
12107 element as necessary. Library support functions for IEEE 128-bit are
12108 assumed to not need the value passed both in GPRs and in vector
12110 if (TARGET_64BIT && !cum->prototype
12111 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12114 /* Otherwise, we pass in VRs only. Check for partial copies. */
12115 passed_in_gprs = false;
12116 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
12117 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
12120 /* In this complicated case we just disable the partial_nregs code. */
12121 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12124 align_words = rs6000_parm_start (mode, type, cum->words);
12126 if (USE_FP_FOR_ARG_P (cum, elt_mode)
12127 && !(TARGET_AIX && !TARGET_ELF
12128 && type != NULL && AGGREGATE_TYPE_P (type)))
12130 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12132 /* If we are passing this arg in the fixed parameter save area
12133 (gprs or memory) as well as FPRs, we do not use the partial
12134 bytes mechanism; instead, rs6000_function_arg will return a
12135 PARALLEL including a memory element as necessary. */
12137 && (cum->nargs_prototype <= 0
12138 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12139 && TARGET_XL_COMPAT
12140 && align_words >= GP_ARG_NUM_REG)))
12143 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12144 passed_in_gprs = false;
12145 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
12147 /* Compute number of bytes / words passed in FPRs. If there
12148 is still space available in the register parameter area
12149 *after* that amount, a part of the argument will be passed
12150 in GPRs. In that case, the total amount passed in any
12151 registers is equal to the amount that would have been passed
12152 in GPRs if everything were passed there, so we fall back to
12153 the GPR code below to compute the appropriate value. */
12154 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
12155 * MIN (8, GET_MODE_SIZE (elt_mode)));
12156 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
12158 if (align_words + fpr_words < GP_ARG_NUM_REG)
12159 passed_in_gprs = true;
12166 && align_words < GP_ARG_NUM_REG
12167 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
12168 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
12170 if (ret != 0 && TARGET_DEBUG_ARG)
12171 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
12176 /* A C expression that indicates when an argument must be passed by
12177 reference. If nonzero for an argument, a copy of that argument is
12178 made in memory and a pointer to the argument is passed instead of
12179 the argument itself. The pointer is passed in whatever way is
12180 appropriate for passing a pointer to that type.
12182 Under V.4, aggregates and long double are passed by reference.
12184 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12185 reference unless the AltiVec vector extension ABI is in force.
12187 As an extension to all ABIs, variable sized types are passed by
12191 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
12192 machine_mode mode, const_tree type,
12193 bool named ATTRIBUTE_UNUSED)
12198 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
12199 && FLOAT128_IEEE_P (TYPE_MODE (type)))
12201 if (TARGET_DEBUG_ARG)
12202 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12206 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
12208 if (TARGET_DEBUG_ARG)
12209 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
12213 if (int_size_in_bytes (type) < 0)
12215 if (TARGET_DEBUG_ARG)
12216 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
12220 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12221 modes only exist for GCC vector types if -maltivec. */
12222 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12224 if (TARGET_DEBUG_ARG)
12225 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
12229 /* Pass synthetic vectors in memory. */
12230 if (TREE_CODE (type) == VECTOR_TYPE
12231 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
12233 static bool warned_for_pass_big_vectors = false;
12234 if (TARGET_DEBUG_ARG)
12235 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
12236 if (!warned_for_pass_big_vectors)
12238 warning (OPT_Wpsabi, "GCC vector passed by reference: "
12239 "non-standard ABI extension with no compatibility "
12241 warned_for_pass_big_vectors = true;
12249 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
12250 already processes. Return true if the parameter must be passed
12251 (fully or partially) on the stack. */
12254 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
12260 /* Catch errors. */
12261 if (type == NULL || type == error_mark_node)
12264 /* Handle types with no storage requirement. */
12265 if (TYPE_MODE (type) == VOIDmode)
12268 /* Handle complex types. */
12269 if (TREE_CODE (type) == COMPLEX_TYPE)
12270 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
12271 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
12273 /* Handle transparent aggregates. */
12274 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
12275 && TYPE_TRANSPARENT_AGGR (type))
12276 type = TREE_TYPE (first_field (type));
12278 /* See if this arg was passed by invisible reference. */
12279 if (pass_by_reference (get_cumulative_args (args_so_far),
12280 TYPE_MODE (type), type, true))
12281 type = build_pointer_type (type);
12283 /* Find mode as it is passed by the ABI. */
12284 unsignedp = TYPE_UNSIGNED (type);
12285 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
12287 /* If we must pass in stack, we need a stack. */
12288 if (rs6000_must_pass_in_stack (mode, type))
12291 /* If there is no incoming register, we need a stack. */
12292 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
12293 if (entry_parm == NULL)
12296 /* Likewise if we need to pass both in registers and on the stack. */
12297 if (GET_CODE (entry_parm) == PARALLEL
12298 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
12301 /* Also true if we're partially in registers and partially not. */
12302 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
12305 /* Update info on where next arg arrives in registers. */
12306 rs6000_function_arg_advance (args_so_far, mode, type, true);
12310 /* Return true if FUN has no prototype, has a variable argument
12311 list, or passes any parameter in memory. */
12314 rs6000_function_parms_need_stack (tree fun, bool incoming)
12316 tree fntype, result;
12317 CUMULATIVE_ARGS args_so_far_v;
12318 cumulative_args_t args_so_far;
12321 /* Must be a libcall, all of which only use reg parms. */
12326 fntype = TREE_TYPE (fun);
12328 /* Varargs functions need the parameter save area. */
12329 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
12332 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
12333 args_so_far = pack_cumulative_args (&args_so_far_v);
12335 /* When incoming, we will have been passed the function decl.
12336 It is necessary to use the decl to handle K&R style functions,
12337 where TYPE_ARG_TYPES may not be available. */
12340 gcc_assert (DECL_P (fun));
12341 result = DECL_RESULT (fun);
12344 result = TREE_TYPE (fntype);
12346 if (result && aggregate_value_p (result, fntype))
12348 if (!TYPE_P (result))
12349 result = TREE_TYPE (result);
12350 result = build_pointer_type (result);
12351 rs6000_parm_needs_stack (args_so_far, result);
12358 for (parm = DECL_ARGUMENTS (fun);
12359 parm && parm != void_list_node;
12360 parm = TREE_CHAIN (parm))
12361 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
12366 function_args_iterator args_iter;
12369 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
12370 if (rs6000_parm_needs_stack (args_so_far, arg_type))
12377 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
12378 usually a constant depending on the ABI. However, in the ELFv2 ABI
12379 the register parameter area is optional when calling a function that
12380 has a prototype is scope, has no variable argument list, and passes
12381 all parameters in registers. */
12384 rs6000_reg_parm_stack_space (tree fun, bool incoming)
12386 int reg_parm_stack_space;
12388 switch (DEFAULT_ABI)
12391 reg_parm_stack_space = 0;
12396 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12400 /* ??? Recomputing this every time is a bit expensive. Is there
12401 a place to cache this information? */
12402 if (rs6000_function_parms_need_stack (fun, incoming))
12403 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12405 reg_parm_stack_space = 0;
12409 return reg_parm_stack_space;
12413 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
12416 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
12421 for (i = 0; i < nregs; i++)
12423 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
12424 if (reload_completed)
12426 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
12429 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
12430 i * GET_MODE_SIZE (reg_mode));
12433 tem = replace_equiv_address (tem, XEXP (tem, 0));
12437 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
12441 /* Perform any needed actions needed for a function that is receiving a
12442 variable number of arguments.
12446 MODE and TYPE are the mode and type of the current parameter.
12448 PRETEND_SIZE is a variable that should be set to the amount of stack
12449 that must be pushed by the prolog to pretend that our caller pushed
12452 Normally, this macro will push all remaining incoming registers on the
12453 stack and set PRETEND_SIZE to the length of the registers pushed. */
12456 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12457 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12460 CUMULATIVE_ARGS next_cum;
12461 int reg_size = TARGET_32BIT ? 4 : 8;
12462 rtx save_area = NULL_RTX, mem;
12463 int first_reg_offset;
12464 alias_set_type set;
12466 /* Skip the last named argument. */
12467 next_cum = *get_cumulative_args (cum);
12468 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12470 if (DEFAULT_ABI == ABI_V4)
12472 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12476 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12477 HOST_WIDE_INT offset = 0;
12479 /* Try to optimize the size of the varargs save area.
12480 The ABI requires that ap.reg_save_area is doubleword
12481 aligned, but we don't need to allocate space for all
12482 the bytes, only those to which we actually will save
12484 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12485 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12486 if (TARGET_HARD_FLOAT
12487 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12488 && cfun->va_list_fpr_size)
12491 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12492 * UNITS_PER_FP_WORD;
12493 if (cfun->va_list_fpr_size
12494 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12495 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12497 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12498 * UNITS_PER_FP_WORD;
12502 offset = -((first_reg_offset * reg_size) & ~7);
12503 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12505 gpr_reg_num = cfun->va_list_gpr_size;
12506 if (reg_size == 4 && (first_reg_offset & 1))
12509 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12512 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12513 * UNITS_PER_FP_WORD
12514 - (int) (GP_ARG_NUM_REG * reg_size);
12516 if (gpr_size + fpr_size)
12519 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12520 gcc_assert (MEM_P (reg_save_area));
12521 reg_save_area = XEXP (reg_save_area, 0);
12522 if (GET_CODE (reg_save_area) == PLUS)
12524 gcc_assert (XEXP (reg_save_area, 0)
12525 == virtual_stack_vars_rtx);
12526 gcc_assert (CONST_INT_P (XEXP (reg_save_area, 1)));
12527 offset += INTVAL (XEXP (reg_save_area, 1));
12530 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
12533 cfun->machine->varargs_save_offset = offset;
12534 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
12539 first_reg_offset = next_cum.words;
12540 save_area = crtl->args.internal_arg_pointer;
12542 if (targetm.calls.must_pass_in_stack (mode, type))
12543 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
12546 set = get_varargs_alias_set ();
12547 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
12548 && cfun->va_list_gpr_size)
12550 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
12552 if (va_list_gpr_counter_field)
12553 /* V4 va_list_gpr_size counts number of registers needed. */
12554 n_gpr = cfun->va_list_gpr_size;
12556 /* char * va_list instead counts number of bytes needed. */
12557 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
12562 mem = gen_rtx_MEM (BLKmode,
12563 plus_constant (Pmode, save_area,
12564 first_reg_offset * reg_size));
12565 MEM_NOTRAP_P (mem) = 1;
12566 set_mem_alias_set (mem, set);
12567 set_mem_align (mem, BITS_PER_WORD);
12569 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
12573 /* Save FP registers if needed. */
12574 if (DEFAULT_ABI == ABI_V4
12575 && TARGET_HARD_FLOAT
12577 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12578 && cfun->va_list_fpr_size)
12580 int fregno = next_cum.fregno, nregs;
12581 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
12582 rtx lab = gen_label_rtx ();
12583 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
12584 * UNITS_PER_FP_WORD);
12587 (gen_rtx_SET (pc_rtx,
12588 gen_rtx_IF_THEN_ELSE (VOIDmode,
12589 gen_rtx_NE (VOIDmode, cr1,
12591 gen_rtx_LABEL_REF (VOIDmode, lab),
12595 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
12596 fregno++, off += UNITS_PER_FP_WORD, nregs++)
12598 mem = gen_rtx_MEM (TARGET_HARD_FLOAT ? DFmode : SFmode,
12599 plus_constant (Pmode, save_area, off));
12600 MEM_NOTRAP_P (mem) = 1;
12601 set_mem_alias_set (mem, set);
12602 set_mem_align (mem, GET_MODE_ALIGNMENT (
12603 TARGET_HARD_FLOAT ? DFmode : SFmode));
12604 emit_move_insn (mem, gen_rtx_REG (
12605 TARGET_HARD_FLOAT ? DFmode : SFmode, fregno));
12612 /* Create the va_list data type. */
12615 rs6000_build_builtin_va_list (void)
12617 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
12619 /* For AIX, prefer 'char *' because that's what the system
12620 header files like. */
12621 if (DEFAULT_ABI != ABI_V4)
12622 return build_pointer_type (char_type_node);
12624 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
12625 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
12626 get_identifier ("__va_list_tag"), record);
12628 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
12629 unsigned_char_type_node);
12630 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
12631 unsigned_char_type_node);
12632 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
12633 every user file. */
12634 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12635 get_identifier ("reserved"), short_unsigned_type_node);
12636 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12637 get_identifier ("overflow_arg_area"),
12639 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12640 get_identifier ("reg_save_area"),
12643 va_list_gpr_counter_field = f_gpr;
12644 va_list_fpr_counter_field = f_fpr;
12646 DECL_FIELD_CONTEXT (f_gpr) = record;
12647 DECL_FIELD_CONTEXT (f_fpr) = record;
12648 DECL_FIELD_CONTEXT (f_res) = record;
12649 DECL_FIELD_CONTEXT (f_ovf) = record;
12650 DECL_FIELD_CONTEXT (f_sav) = record;
12652 TYPE_STUB_DECL (record) = type_decl;
12653 TYPE_NAME (record) = type_decl;
12654 TYPE_FIELDS (record) = f_gpr;
12655 DECL_CHAIN (f_gpr) = f_fpr;
12656 DECL_CHAIN (f_fpr) = f_res;
12657 DECL_CHAIN (f_res) = f_ovf;
12658 DECL_CHAIN (f_ovf) = f_sav;
12660 layout_type (record);
12662 /* The correct type is an array type of one element. */
12663 return build_array_type (record, build_index_type (size_zero_node));
12666 /* Implement va_start. */
12669 rs6000_va_start (tree valist, rtx nextarg)
12671 HOST_WIDE_INT words, n_gpr, n_fpr;
12672 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12673 tree gpr, fpr, ovf, sav, t;
12675 /* Only SVR4 needs something special. */
12676 if (DEFAULT_ABI != ABI_V4)
12678 std_expand_builtin_va_start (valist, nextarg);
12682 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12683 f_fpr = DECL_CHAIN (f_gpr);
12684 f_res = DECL_CHAIN (f_fpr);
12685 f_ovf = DECL_CHAIN (f_res);
12686 f_sav = DECL_CHAIN (f_ovf);
12688 valist = build_simple_mem_ref (valist);
12689 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12690 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12692 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12694 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12697 /* Count number of gp and fp argument registers used. */
12698 words = crtl->args.info.words;
12699 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
12701 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
12704 if (TARGET_DEBUG_ARG)
12705 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
12706 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
12707 words, n_gpr, n_fpr);
12709 if (cfun->va_list_gpr_size)
12711 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12712 build_int_cst (NULL_TREE, n_gpr));
12713 TREE_SIDE_EFFECTS (t) = 1;
12714 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12717 if (cfun->va_list_fpr_size)
12719 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12720 build_int_cst (NULL_TREE, n_fpr));
12721 TREE_SIDE_EFFECTS (t) = 1;
12722 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12724 #ifdef HAVE_AS_GNU_ATTRIBUTE
12725 if (call_ABI_of_interest (cfun->decl))
12726 rs6000_passes_float = true;
12730 /* Find the overflow area. */
12731 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
12733 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
12734 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12735 TREE_SIDE_EFFECTS (t) = 1;
12736 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12738 /* If there were no va_arg invocations, don't set up the register
12740 if (!cfun->va_list_gpr_size
12741 && !cfun->va_list_fpr_size
12742 && n_gpr < GP_ARG_NUM_REG
12743 && n_fpr < FP_ARG_V4_MAX_REG)
12746 /* Find the register save area. */
12747 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
12748 if (cfun->machine->varargs_save_offset)
12749 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
12750 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12751 TREE_SIDE_EFFECTS (t) = 1;
12752 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12755 /* Implement va_arg. */
12758 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12759 gimple_seq *post_p)
12761 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12762 tree gpr, fpr, ovf, sav, reg, t, u;
12763 int size, rsize, n_reg, sav_ofs, sav_scale;
12764 tree lab_false, lab_over, addr;
12766 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
12770 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12772 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
12773 return build_va_arg_indirect_ref (t);
12776 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
12777 earlier version of gcc, with the property that it always applied alignment
12778 adjustments to the va-args (even for zero-sized types). The cheapest way
12779 to deal with this is to replicate the effect of the part of
12780 std_gimplify_va_arg_expr that carries out the align adjust, for the case
12782 We don't need to check for pass-by-reference because of the test above.
12783 We can return a simplifed answer, since we know there's no offset to add. */
12786 && rs6000_darwin64_abi)
12787 || DEFAULT_ABI == ABI_ELFv2
12788 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
12789 && integer_zerop (TYPE_SIZE (type)))
12791 unsigned HOST_WIDE_INT align, boundary;
12792 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
12793 align = PARM_BOUNDARY / BITS_PER_UNIT;
12794 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
12795 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
12796 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
12797 boundary /= BITS_PER_UNIT;
12798 if (boundary > align)
12801 /* This updates arg ptr by the amount that would be necessary
12802 to align the zero-sized (but not zero-alignment) item. */
12803 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12804 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
12805 gimplify_and_add (t, pre_p);
12807 t = fold_convert (sizetype, valist_tmp);
12808 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12809 fold_convert (TREE_TYPE (valist),
12810 fold_build2 (BIT_AND_EXPR, sizetype, t,
12811 size_int (-boundary))));
12812 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
12813 gimplify_and_add (t, pre_p);
12815 /* Since it is zero-sized there's no increment for the item itself. */
12816 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
12817 return build_va_arg_indirect_ref (valist_tmp);
12820 if (DEFAULT_ABI != ABI_V4)
12822 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
12824 tree elem_type = TREE_TYPE (type);
12825 machine_mode elem_mode = TYPE_MODE (elem_type);
12826 int elem_size = GET_MODE_SIZE (elem_mode);
12828 if (elem_size < UNITS_PER_WORD)
12830 tree real_part, imag_part;
12831 gimple_seq post = NULL;
12833 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12835 /* Copy the value into a temporary, lest the formal temporary
12836 be reused out from under us. */
12837 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
12838 gimple_seq_add_seq (pre_p, post);
12840 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12843 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
12847 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
12850 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12851 f_fpr = DECL_CHAIN (f_gpr);
12852 f_res = DECL_CHAIN (f_fpr);
12853 f_ovf = DECL_CHAIN (f_res);
12854 f_sav = DECL_CHAIN (f_ovf);
12856 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12857 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12859 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12861 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12864 size = int_size_in_bytes (type);
12865 rsize = (size + 3) / 4;
12866 int pad = 4 * rsize - size;
12869 machine_mode mode = TYPE_MODE (type);
12870 if (abi_v4_pass_in_fpr (mode, false))
12872 /* FP args go in FP registers, if present. */
12874 n_reg = (size + 7) / 8;
12875 sav_ofs = (TARGET_HARD_FLOAT ? 8 : 4) * 4;
12876 sav_scale = (TARGET_HARD_FLOAT ? 8 : 4);
12877 if (mode != SFmode && mode != SDmode)
12882 /* Otherwise into GP registers. */
12891 /* Pull the value out of the saved registers.... */
12894 addr = create_tmp_var (ptr_type_node, "addr");
12896 /* AltiVec vectors never go in registers when -mabi=altivec. */
12897 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12901 lab_false = create_artificial_label (input_location);
12902 lab_over = create_artificial_label (input_location);
12904 /* Long long is aligned in the registers. As are any other 2 gpr
12905 item such as complex int due to a historical mistake. */
12907 if (n_reg == 2 && reg == gpr)
12910 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12911 build_int_cst (TREE_TYPE (reg), n_reg - 1));
12912 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
12913 unshare_expr (reg), u);
12915 /* _Decimal128 is passed in even/odd fpr pairs; the stored
12916 reg number is 0 for f1, so we want to make it odd. */
12917 else if (reg == fpr && mode == TDmode)
12919 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12920 build_int_cst (TREE_TYPE (reg), 1));
12921 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
12924 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
12925 t = build2 (GE_EXPR, boolean_type_node, u, t);
12926 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12927 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12928 gimplify_and_add (t, pre_p);
12932 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12934 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12935 build_int_cst (TREE_TYPE (reg), n_reg));
12936 u = fold_convert (sizetype, u);
12937 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
12938 t = fold_build_pointer_plus (t, u);
12940 /* _Decimal32 varargs are located in the second word of the 64-bit
12941 FP register for 32-bit binaries. */
12942 if (TARGET_32BIT && TARGET_HARD_FLOAT && mode == SDmode)
12943 t = fold_build_pointer_plus_hwi (t, size);
12945 /* Args are passed right-aligned. */
12946 if (BYTES_BIG_ENDIAN)
12947 t = fold_build_pointer_plus_hwi (t, pad);
12949 gimplify_assign (addr, t, pre_p);
12951 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12953 stmt = gimple_build_label (lab_false);
12954 gimple_seq_add_stmt (pre_p, stmt);
12956 if ((n_reg == 2 && !regalign) || n_reg > 2)
12958 /* Ensure that we don't find any more args in regs.
12959 Alignment has taken care of for special cases. */
12960 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
12964 /* ... otherwise out of the overflow area. */
12966 /* Care for on-stack alignment if needed. */
12970 t = fold_build_pointer_plus_hwi (t, align - 1);
12971 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12972 build_int_cst (TREE_TYPE (t), -align));
12975 /* Args are passed right-aligned. */
12976 if (BYTES_BIG_ENDIAN)
12977 t = fold_build_pointer_plus_hwi (t, pad);
12979 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12981 gimplify_assign (unshare_expr (addr), t, pre_p);
12983 t = fold_build_pointer_plus_hwi (t, size);
12984 gimplify_assign (unshare_expr (ovf), t, pre_p);
12988 stmt = gimple_build_label (lab_over);
12989 gimple_seq_add_stmt (pre_p, stmt);
12992 if (STRICT_ALIGNMENT
12993 && (TYPE_ALIGN (type)
12994 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
12996 /* The value (of type complex double, for example) may not be
12997 aligned in memory in the saved registers, so copy via a
12998 temporary. (This is the same code as used for SPARC.) */
12999 tree tmp = create_tmp_var (type, "va_arg_tmp");
13000 tree dest_addr = build_fold_addr_expr (tmp);
13002 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
13003 3, dest_addr, addr, size_int (rsize * 4));
13004 TREE_ADDRESSABLE (tmp) = 1;
13006 gimplify_and_add (copy, pre_p);
13010 addr = fold_convert (ptrtype, addr);
13011 return build_va_arg_indirect_ref (addr);
13017 def_builtin (const char *name, tree type, enum rs6000_builtins code)
13020 unsigned classify = rs6000_builtin_info[(int)code].attr;
13021 const char *attr_string = "";
13023 gcc_assert (name != NULL);
13024 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
13026 if (rs6000_builtin_decls[(int)code])
13027 fatal_error (input_location,
13028 "internal error: builtin function %qs already processed",
13031 rs6000_builtin_decls[(int)code] = t =
13032 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
13034 /* Set any special attributes. */
13035 if ((classify & RS6000_BTC_CONST) != 0)
13037 /* const function, function only depends on the inputs. */
13038 TREE_READONLY (t) = 1;
13039 TREE_NOTHROW (t) = 1;
13040 attr_string = ", const";
13042 else if ((classify & RS6000_BTC_PURE) != 0)
13044 /* pure function, function can read global memory, but does not set any
13046 DECL_PURE_P (t) = 1;
13047 TREE_NOTHROW (t) = 1;
13048 attr_string = ", pure";
13050 else if ((classify & RS6000_BTC_FP) != 0)
13052 /* Function is a math function. If rounding mode is on, then treat the
13053 function as not reading global memory, but it can have arbitrary side
13054 effects. If it is off, then assume the function is a const function.
13055 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13056 builtin-attribute.def that is used for the math functions. */
13057 TREE_NOTHROW (t) = 1;
13058 if (flag_rounding_math)
13060 DECL_PURE_P (t) = 1;
13061 DECL_IS_NOVOPS (t) = 1;
13062 attr_string = ", fp, pure";
13066 TREE_READONLY (t) = 1;
13067 attr_string = ", fp, const";
13070 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
13071 gcc_unreachable ();
13073 if (TARGET_DEBUG_BUILTIN)
13074 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
13075 (int)code, name, attr_string);
13078 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13080 #undef RS6000_BUILTIN_0
13081 #undef RS6000_BUILTIN_1
13082 #undef RS6000_BUILTIN_2
13083 #undef RS6000_BUILTIN_3
13084 #undef RS6000_BUILTIN_A
13085 #undef RS6000_BUILTIN_D
13086 #undef RS6000_BUILTIN_H
13087 #undef RS6000_BUILTIN_P
13088 #undef RS6000_BUILTIN_X
13090 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13091 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13092 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13093 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13094 { MASK, ICODE, NAME, ENUM },
13096 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13097 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13098 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13099 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13100 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13102 static const struct builtin_description bdesc_3arg[] =
13104 #include "rs6000-builtin.def"
13107 /* DST operations: void foo (void *, const int, const char). */
13109 #undef RS6000_BUILTIN_0
13110 #undef RS6000_BUILTIN_1
13111 #undef RS6000_BUILTIN_2
13112 #undef RS6000_BUILTIN_3
13113 #undef RS6000_BUILTIN_A
13114 #undef RS6000_BUILTIN_D
13115 #undef RS6000_BUILTIN_H
13116 #undef RS6000_BUILTIN_P
13117 #undef RS6000_BUILTIN_X
13119 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13120 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13121 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13122 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13123 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13124 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13125 { MASK, ICODE, NAME, ENUM },
13127 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13128 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13129 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13131 static const struct builtin_description bdesc_dst[] =
13133 #include "rs6000-builtin.def"
13136 /* Simple binary operations: VECc = foo (VECa, VECb). */
13138 #undef RS6000_BUILTIN_0
13139 #undef RS6000_BUILTIN_1
13140 #undef RS6000_BUILTIN_2
13141 #undef RS6000_BUILTIN_3
13142 #undef RS6000_BUILTIN_A
13143 #undef RS6000_BUILTIN_D
13144 #undef RS6000_BUILTIN_H
13145 #undef RS6000_BUILTIN_P
13146 #undef RS6000_BUILTIN_X
13148 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13149 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13150 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13151 { MASK, ICODE, NAME, ENUM },
13153 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13154 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13155 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13156 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13157 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13158 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13160 static const struct builtin_description bdesc_2arg[] =
13162 #include "rs6000-builtin.def"
13165 #undef RS6000_BUILTIN_0
13166 #undef RS6000_BUILTIN_1
13167 #undef RS6000_BUILTIN_2
13168 #undef RS6000_BUILTIN_3
13169 #undef RS6000_BUILTIN_A
13170 #undef RS6000_BUILTIN_D
13171 #undef RS6000_BUILTIN_H
13172 #undef RS6000_BUILTIN_P
13173 #undef RS6000_BUILTIN_X
13175 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13176 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13177 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13178 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13179 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13180 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13181 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13182 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13183 { MASK, ICODE, NAME, ENUM },
13185 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13187 /* AltiVec predicates. */
13189 static const struct builtin_description bdesc_altivec_preds[] =
13191 #include "rs6000-builtin.def"
13194 /* ABS* operations. */
13196 #undef RS6000_BUILTIN_0
13197 #undef RS6000_BUILTIN_1
13198 #undef RS6000_BUILTIN_2
13199 #undef RS6000_BUILTIN_3
13200 #undef RS6000_BUILTIN_A
13201 #undef RS6000_BUILTIN_D
13202 #undef RS6000_BUILTIN_H
13203 #undef RS6000_BUILTIN_P
13204 #undef RS6000_BUILTIN_X
13206 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13207 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13208 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13209 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13210 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
13211 { MASK, ICODE, NAME, ENUM },
13213 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13214 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13215 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13216 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13218 static const struct builtin_description bdesc_abs[] =
13220 #include "rs6000-builtin.def"
13223 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
13226 #undef RS6000_BUILTIN_0
13227 #undef RS6000_BUILTIN_1
13228 #undef RS6000_BUILTIN_2
13229 #undef RS6000_BUILTIN_3
13230 #undef RS6000_BUILTIN_A
13231 #undef RS6000_BUILTIN_D
13232 #undef RS6000_BUILTIN_H
13233 #undef RS6000_BUILTIN_P
13234 #undef RS6000_BUILTIN_X
13236 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13237 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
13238 { MASK, ICODE, NAME, ENUM },
13240 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13241 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13242 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13243 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13244 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13245 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13246 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13248 static const struct builtin_description bdesc_1arg[] =
13250 #include "rs6000-builtin.def"
13253 /* Simple no-argument operations: result = __builtin_darn_32 () */
13255 #undef RS6000_BUILTIN_0
13256 #undef RS6000_BUILTIN_1
13257 #undef RS6000_BUILTIN_2
13258 #undef RS6000_BUILTIN_3
13259 #undef RS6000_BUILTIN_A
13260 #undef RS6000_BUILTIN_D
13261 #undef RS6000_BUILTIN_H
13262 #undef RS6000_BUILTIN_P
13263 #undef RS6000_BUILTIN_X
13265 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
13266 { MASK, ICODE, NAME, ENUM },
13268 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13269 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13270 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13271 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13272 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13273 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13274 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13275 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13277 static const struct builtin_description bdesc_0arg[] =
13279 #include "rs6000-builtin.def"
13282 /* HTM builtins. */
13283 #undef RS6000_BUILTIN_0
13284 #undef RS6000_BUILTIN_1
13285 #undef RS6000_BUILTIN_2
13286 #undef RS6000_BUILTIN_3
13287 #undef RS6000_BUILTIN_A
13288 #undef RS6000_BUILTIN_D
13289 #undef RS6000_BUILTIN_H
13290 #undef RS6000_BUILTIN_P
13291 #undef RS6000_BUILTIN_X
13293 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13294 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13295 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13296 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13297 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13298 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13299 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
13300 { MASK, ICODE, NAME, ENUM },
13302 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13303 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13305 static const struct builtin_description bdesc_htm[] =
13307 #include "rs6000-builtin.def"
13310 #undef RS6000_BUILTIN_0
13311 #undef RS6000_BUILTIN_1
13312 #undef RS6000_BUILTIN_2
13313 #undef RS6000_BUILTIN_3
13314 #undef RS6000_BUILTIN_A
13315 #undef RS6000_BUILTIN_D
13316 #undef RS6000_BUILTIN_H
13317 #undef RS6000_BUILTIN_P
13319 /* Return true if a builtin function is overloaded. */
13321 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
13323 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
13327 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
13329 return rs6000_builtin_info[(int)fncode].name;
13332 /* Expand an expression EXP that calls a builtin without arguments. */
13334 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
13337 machine_mode tmode = insn_data[icode].operand[0].mode;
13339 if (icode == CODE_FOR_nothing)
13340 /* Builtin not supported on this processor. */
13343 if (icode == CODE_FOR_rs6000_mffsl
13344 && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13346 error ("__builtin_mffsl() not supported with -msoft-float");
13351 || GET_MODE (target) != tmode
13352 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13353 target = gen_reg_rtx (tmode);
13355 pat = GEN_FCN (icode) (target);
13365 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
13368 tree arg0 = CALL_EXPR_ARG (exp, 0);
13369 tree arg1 = CALL_EXPR_ARG (exp, 1);
13370 rtx op0 = expand_normal (arg0);
13371 rtx op1 = expand_normal (arg1);
13372 machine_mode mode0 = insn_data[icode].operand[0].mode;
13373 machine_mode mode1 = insn_data[icode].operand[1].mode;
13375 if (icode == CODE_FOR_nothing)
13376 /* Builtin not supported on this processor. */
13379 /* If we got invalid arguments bail out before generating bad rtl. */
13380 if (arg0 == error_mark_node || arg1 == error_mark_node)
13383 if (!CONST_INT_P (op0)
13384 || INTVAL (op0) > 255
13385 || INTVAL (op0) < 0)
13387 error ("argument 1 must be an 8-bit field value");
13391 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13392 op0 = copy_to_mode_reg (mode0, op0);
13394 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13395 op1 = copy_to_mode_reg (mode1, op1);
13397 pat = GEN_FCN (icode) (op0, op1);
13406 rs6000_expand_mtfsb_builtin (enum insn_code icode, tree exp)
13409 tree arg0 = CALL_EXPR_ARG (exp, 0);
13410 rtx op0 = expand_normal (arg0);
13412 if (icode == CODE_FOR_nothing)
13413 /* Builtin not supported on this processor. */
13416 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13418 error ("__builtin_mtfsb0 and __builtin_mtfsb1 not supported with -msoft-float");
13422 /* If we got invalid arguments bail out before generating bad rtl. */
13423 if (arg0 == error_mark_node)
13426 /* Only allow bit numbers 0 to 31. */
13427 if (!u5bit_cint_operand (op0, VOIDmode))
13429 error ("Argument must be a constant between 0 and 31.");
13433 pat = GEN_FCN (icode) (op0);
13442 rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp)
13445 tree arg0 = CALL_EXPR_ARG (exp, 0);
13446 rtx op0 = expand_normal (arg0);
13447 machine_mode mode0 = insn_data[icode].operand[0].mode;
13449 if (icode == CODE_FOR_nothing)
13450 /* Builtin not supported on this processor. */
13453 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13455 error ("__builtin_set_fpscr_rn not supported with -msoft-float");
13459 /* If we got invalid arguments bail out before generating bad rtl. */
13460 if (arg0 == error_mark_node)
13463 /* If the argument is a constant, check the range. Argument can only be a
13464 2-bit value. Unfortunately, can't check the range of the value at
13465 compile time if the argument is a variable. The least significant two
13466 bits of the argument, regardless of type, are used to set the rounding
13467 mode. All other bits are ignored. */
13468 if (CONST_INT_P (op0) && !const_0_to_3_operand(op0, VOIDmode))
13470 error ("Argument must be a value between 0 and 3.");
13474 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13475 op0 = copy_to_mode_reg (mode0, op0);
13477 pat = GEN_FCN (icode) (op0);
13485 rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp)
13488 tree arg0 = CALL_EXPR_ARG (exp, 0);
13489 rtx op0 = expand_normal (arg0);
13490 machine_mode mode0 = insn_data[icode].operand[0].mode;
13493 /* Builtin not supported in 32-bit mode. */
13494 fatal_error (input_location,
13495 "__builtin_set_fpscr_drn is not supported in 32-bit mode.");
13497 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13499 error ("__builtin_set_fpscr_drn not supported with -msoft-float");
13503 if (icode == CODE_FOR_nothing)
13504 /* Builtin not supported on this processor. */
13507 /* If we got invalid arguments bail out before generating bad rtl. */
13508 if (arg0 == error_mark_node)
13511 /* If the argument is a constant, check the range. Agrument can only be a
13512 3-bit value. Unfortunately, can't check the range of the value at
13513 compile time if the argument is a variable. The least significant two
13514 bits of the argument, regardless of type, are used to set the rounding
13515 mode. All other bits are ignored. */
13516 if (CONST_INT_P (op0) && !const_0_to_7_operand(op0, VOIDmode))
13518 error ("Argument must be a value between 0 and 7.");
13522 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13523 op0 = copy_to_mode_reg (mode0, op0);
13525 pat = GEN_FCN (icode) (op0);
13534 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
13537 tree arg0 = CALL_EXPR_ARG (exp, 0);
13538 rtx op0 = expand_normal (arg0);
13539 machine_mode tmode = insn_data[icode].operand[0].mode;
13540 machine_mode mode0 = insn_data[icode].operand[1].mode;
13542 if (icode == CODE_FOR_nothing)
13543 /* Builtin not supported on this processor. */
13546 /* If we got invalid arguments bail out before generating bad rtl. */
13547 if (arg0 == error_mark_node)
13550 if (icode == CODE_FOR_altivec_vspltisb
13551 || icode == CODE_FOR_altivec_vspltish
13552 || icode == CODE_FOR_altivec_vspltisw)
13554 /* Only allow 5-bit *signed* literals. */
13555 if (!CONST_INT_P (op0)
13556 || INTVAL (op0) > 15
13557 || INTVAL (op0) < -16)
13559 error ("argument 1 must be a 5-bit signed literal");
13560 return CONST0_RTX (tmode);
13565 || GET_MODE (target) != tmode
13566 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13567 target = gen_reg_rtx (tmode);
13569 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13570 op0 = copy_to_mode_reg (mode0, op0);
13572 pat = GEN_FCN (icode) (target, op0);
13581 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
13583 rtx pat, scratch1, scratch2;
13584 tree arg0 = CALL_EXPR_ARG (exp, 0);
13585 rtx op0 = expand_normal (arg0);
13586 machine_mode tmode = insn_data[icode].operand[0].mode;
13587 machine_mode mode0 = insn_data[icode].operand[1].mode;
13589 /* If we have invalid arguments, bail out before generating bad rtl. */
13590 if (arg0 == error_mark_node)
13594 || GET_MODE (target) != tmode
13595 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13596 target = gen_reg_rtx (tmode);
13598 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13599 op0 = copy_to_mode_reg (mode0, op0);
13601 scratch1 = gen_reg_rtx (mode0);
13602 scratch2 = gen_reg_rtx (mode0);
13604 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
13613 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
13616 tree arg0 = CALL_EXPR_ARG (exp, 0);
13617 tree arg1 = CALL_EXPR_ARG (exp, 1);
13618 rtx op0 = expand_normal (arg0);
13619 rtx op1 = expand_normal (arg1);
13620 machine_mode tmode = insn_data[icode].operand[0].mode;
13621 machine_mode mode0 = insn_data[icode].operand[1].mode;
13622 machine_mode mode1 = insn_data[icode].operand[2].mode;
13624 if (icode == CODE_FOR_nothing)
13625 /* Builtin not supported on this processor. */
13628 /* If we got invalid arguments bail out before generating bad rtl. */
13629 if (arg0 == error_mark_node || arg1 == error_mark_node)
13632 if (icode == CODE_FOR_unpackv1ti
13633 || icode == CODE_FOR_unpackkf
13634 || icode == CODE_FOR_unpacktf
13635 || icode == CODE_FOR_unpackif
13636 || icode == CODE_FOR_unpacktd)
13638 /* Only allow 1-bit unsigned literals. */
13640 if (TREE_CODE (arg1) != INTEGER_CST
13641 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
13643 error ("argument 2 must be a 1-bit unsigned literal");
13644 return CONST0_RTX (tmode);
13647 else if (icode == CODE_FOR_altivec_vspltw)
13649 /* Only allow 2-bit unsigned literals. */
13651 if (TREE_CODE (arg1) != INTEGER_CST
13652 || TREE_INT_CST_LOW (arg1) & ~3)
13654 error ("argument 2 must be a 2-bit unsigned literal");
13655 return CONST0_RTX (tmode);
13658 else if (icode == CODE_FOR_altivec_vsplth)
13660 /* Only allow 3-bit unsigned literals. */
13662 if (TREE_CODE (arg1) != INTEGER_CST
13663 || TREE_INT_CST_LOW (arg1) & ~7)
13665 error ("argument 2 must be a 3-bit unsigned literal");
13666 return CONST0_RTX (tmode);
13669 else if (icode == CODE_FOR_altivec_vspltb)
13671 /* Only allow 4-bit unsigned literals. */
13673 if (TREE_CODE (arg1) != INTEGER_CST
13674 || TREE_INT_CST_LOW (arg1) & ~15)
13676 error ("argument 2 must be a 4-bit unsigned literal");
13677 return CONST0_RTX (tmode);
13680 else if (icode == CODE_FOR_altivec_vcfux
13681 || icode == CODE_FOR_altivec_vcfsx
13682 || icode == CODE_FOR_altivec_vctsxs
13683 || icode == CODE_FOR_altivec_vctuxs)
13685 /* Only allow 5-bit unsigned literals. */
13687 if (TREE_CODE (arg1) != INTEGER_CST
13688 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13690 error ("argument 2 must be a 5-bit unsigned literal");
13691 return CONST0_RTX (tmode);
13694 else if (icode == CODE_FOR_dfptstsfi_eq_dd
13695 || icode == CODE_FOR_dfptstsfi_lt_dd
13696 || icode == CODE_FOR_dfptstsfi_gt_dd
13697 || icode == CODE_FOR_dfptstsfi_unordered_dd
13698 || icode == CODE_FOR_dfptstsfi_eq_td
13699 || icode == CODE_FOR_dfptstsfi_lt_td
13700 || icode == CODE_FOR_dfptstsfi_gt_td
13701 || icode == CODE_FOR_dfptstsfi_unordered_td)
13703 /* Only allow 6-bit unsigned literals. */
13705 if (TREE_CODE (arg0) != INTEGER_CST
13706 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
13708 error ("argument 1 must be a 6-bit unsigned literal");
13709 return CONST0_RTX (tmode);
13712 else if (icode == CODE_FOR_xststdcqp_kf
13713 || icode == CODE_FOR_xststdcqp_tf
13714 || icode == CODE_FOR_xststdcdp
13715 || icode == CODE_FOR_xststdcsp
13716 || icode == CODE_FOR_xvtstdcdp
13717 || icode == CODE_FOR_xvtstdcsp)
13719 /* Only allow 7-bit unsigned literals. */
13721 if (TREE_CODE (arg1) != INTEGER_CST
13722 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
13724 error ("argument 2 must be a 7-bit unsigned literal");
13725 return CONST0_RTX (tmode);
13730 || GET_MODE (target) != tmode
13731 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13732 target = gen_reg_rtx (tmode);
13734 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13735 op0 = copy_to_mode_reg (mode0, op0);
13736 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13737 op1 = copy_to_mode_reg (mode1, op1);
13739 pat = GEN_FCN (icode) (target, op0, op1);
13748 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13751 tree cr6_form = CALL_EXPR_ARG (exp, 0);
13752 tree arg0 = CALL_EXPR_ARG (exp, 1);
13753 tree arg1 = CALL_EXPR_ARG (exp, 2);
13754 rtx op0 = expand_normal (arg0);
13755 rtx op1 = expand_normal (arg1);
13756 machine_mode tmode = SImode;
13757 machine_mode mode0 = insn_data[icode].operand[1].mode;
13758 machine_mode mode1 = insn_data[icode].operand[2].mode;
13761 if (TREE_CODE (cr6_form) != INTEGER_CST)
13763 error ("argument 1 of %qs must be a constant",
13764 "__builtin_altivec_predicate");
13768 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
13770 gcc_assert (mode0 == mode1);
13772 /* If we have invalid arguments, bail out before generating bad rtl. */
13773 if (arg0 == error_mark_node || arg1 == error_mark_node)
13777 || GET_MODE (target) != tmode
13778 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13779 target = gen_reg_rtx (tmode);
13781 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13782 op0 = copy_to_mode_reg (mode0, op0);
13783 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13784 op1 = copy_to_mode_reg (mode1, op1);
13786 /* Note that for many of the relevant operations (e.g. cmpne or
13787 cmpeq) with float or double operands, it makes more sense for the
13788 mode of the allocated scratch register to select a vector of
13789 integer. But the choice to copy the mode of operand 0 was made
13790 long ago and there are no plans to change it. */
13791 scratch = gen_reg_rtx (mode0);
13793 pat = GEN_FCN (icode) (scratch, op0, op1);
13798 /* The vec_any* and vec_all* predicates use the same opcodes for two
13799 different operations, but the bits in CR6 will be different
13800 depending on what information we want. So we have to play tricks
13801 with CR6 to get the right bits out.
13803 If you think this is disgusting, look at the specs for the
13804 AltiVec predicates. */
13806 switch (cr6_form_int)
13809 emit_insn (gen_cr6_test_for_zero (target));
13812 emit_insn (gen_cr6_test_for_zero_reverse (target));
13815 emit_insn (gen_cr6_test_for_lt (target));
13818 emit_insn (gen_cr6_test_for_lt_reverse (target));
13821 error ("argument 1 of %qs is out of range",
13822 "__builtin_altivec_predicate");
13830 swap_endian_selector_for_mode (machine_mode mode)
13832 unsigned int swap1[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
13833 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
13834 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
13835 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
13837 unsigned int *swaparray, i;
13857 gcc_unreachable ();
13860 for (i = 0; i < 16; ++i)
13861 perm[i] = GEN_INT (swaparray[i]);
13863 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode,
13864 gen_rtvec_v (16, perm)));
13868 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
13871 tree arg0 = CALL_EXPR_ARG (exp, 0);
13872 tree arg1 = CALL_EXPR_ARG (exp, 1);
13873 machine_mode tmode = insn_data[icode].operand[0].mode;
13874 machine_mode mode0 = Pmode;
13875 machine_mode mode1 = Pmode;
13876 rtx op0 = expand_normal (arg0);
13877 rtx op1 = expand_normal (arg1);
13879 if (icode == CODE_FOR_nothing)
13880 /* Builtin not supported on this processor. */
13883 /* If we got invalid arguments bail out before generating bad rtl. */
13884 if (arg0 == error_mark_node || arg1 == error_mark_node)
13888 || GET_MODE (target) != tmode
13889 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13890 target = gen_reg_rtx (tmode);
13892 op1 = copy_to_mode_reg (mode1, op1);
13894 /* For LVX, express the RTL accurately by ANDing the address with -16.
13895 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
13896 so the raw address is fine. */
13897 if (icode == CODE_FOR_altivec_lvx_v1ti
13898 || icode == CODE_FOR_altivec_lvx_v2df
13899 || icode == CODE_FOR_altivec_lvx_v2di
13900 || icode == CODE_FOR_altivec_lvx_v4sf
13901 || icode == CODE_FOR_altivec_lvx_v4si
13902 || icode == CODE_FOR_altivec_lvx_v8hi
13903 || icode == CODE_FOR_altivec_lvx_v16qi)
13906 if (op0 == const0_rtx)
13910 op0 = copy_to_mode_reg (mode0, op0);
13911 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
13913 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13914 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
13916 emit_insn (gen_rtx_SET (target, addr));
13920 if (op0 == const0_rtx)
13921 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
13924 op0 = copy_to_mode_reg (mode0, op0);
13925 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
13926 gen_rtx_PLUS (Pmode, op1, op0));
13929 pat = GEN_FCN (icode) (target, addr);
13939 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
13942 tree arg0 = CALL_EXPR_ARG (exp, 0);
13943 tree arg1 = CALL_EXPR_ARG (exp, 1);
13944 tree arg2 = CALL_EXPR_ARG (exp, 2);
13945 rtx op0 = expand_normal (arg0);
13946 rtx op1 = expand_normal (arg1);
13947 rtx op2 = expand_normal (arg2);
13948 machine_mode mode0 = insn_data[icode].operand[0].mode;
13949 machine_mode mode1 = insn_data[icode].operand[1].mode;
13950 machine_mode mode2 = insn_data[icode].operand[2].mode;
13952 if (icode == CODE_FOR_nothing)
13953 /* Builtin not supported on this processor. */
13956 /* If we got invalid arguments bail out before generating bad rtl. */
13957 if (arg0 == error_mark_node
13958 || arg1 == error_mark_node
13959 || arg2 == error_mark_node)
13962 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13963 op0 = copy_to_mode_reg (mode0, op0);
13964 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13965 op1 = copy_to_mode_reg (mode1, op1);
13966 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13967 op2 = copy_to_mode_reg (mode2, op2);
13969 pat = GEN_FCN (icode) (op0, op1, op2);
13977 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
13979 tree arg0 = CALL_EXPR_ARG (exp, 0);
13980 tree arg1 = CALL_EXPR_ARG (exp, 1);
13981 tree arg2 = CALL_EXPR_ARG (exp, 2);
13982 rtx op0 = expand_normal (arg0);
13983 rtx op1 = expand_normal (arg1);
13984 rtx op2 = expand_normal (arg2);
13985 rtx pat, addr, rawaddr;
13986 machine_mode tmode = insn_data[icode].operand[0].mode;
13987 machine_mode smode = insn_data[icode].operand[1].mode;
13988 machine_mode mode1 = Pmode;
13989 machine_mode mode2 = Pmode;
13991 /* Invalid arguments. Bail before doing anything stoopid! */
13992 if (arg0 == error_mark_node
13993 || arg1 == error_mark_node
13994 || arg2 == error_mark_node)
13997 op2 = copy_to_mode_reg (mode2, op2);
13999 /* For STVX, express the RTL accurately by ANDing the address with -16.
14000 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
14001 so the raw address is fine. */
14002 if (icode == CODE_FOR_altivec_stvx_v2df
14003 || icode == CODE_FOR_altivec_stvx_v2di
14004 || icode == CODE_FOR_altivec_stvx_v4sf
14005 || icode == CODE_FOR_altivec_stvx_v4si
14006 || icode == CODE_FOR_altivec_stvx_v8hi
14007 || icode == CODE_FOR_altivec_stvx_v16qi)
14009 if (op1 == const0_rtx)
14013 op1 = copy_to_mode_reg (mode1, op1);
14014 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
14017 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14018 addr = gen_rtx_MEM (tmode, addr);
14020 op0 = copy_to_mode_reg (tmode, op0);
14022 emit_insn (gen_rtx_SET (addr, op0));
14026 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
14027 op0 = copy_to_mode_reg (smode, op0);
14029 if (op1 == const0_rtx)
14030 addr = gen_rtx_MEM (tmode, op2);
14033 op1 = copy_to_mode_reg (mode1, op1);
14034 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
14037 pat = GEN_FCN (icode) (addr, op0);
14045 /* Return the appropriate SPR number associated with the given builtin. */
14046 static inline HOST_WIDE_INT
14047 htm_spr_num (enum rs6000_builtins code)
14049 if (code == HTM_BUILTIN_GET_TFHAR
14050 || code == HTM_BUILTIN_SET_TFHAR)
14052 else if (code == HTM_BUILTIN_GET_TFIAR
14053 || code == HTM_BUILTIN_SET_TFIAR)
14055 else if (code == HTM_BUILTIN_GET_TEXASR
14056 || code == HTM_BUILTIN_SET_TEXASR)
14058 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
14059 || code == HTM_BUILTIN_SET_TEXASRU);
14060 return TEXASRU_SPR;
14063 /* Return the appropriate SPR regno associated with the given builtin. */
14064 static inline HOST_WIDE_INT
14065 htm_spr_regno (enum rs6000_builtins code)
14067 if (code == HTM_BUILTIN_GET_TFHAR
14068 || code == HTM_BUILTIN_SET_TFHAR)
14069 return TFHAR_REGNO;
14070 else if (code == HTM_BUILTIN_GET_TFIAR
14071 || code == HTM_BUILTIN_SET_TFIAR)
14072 return TFIAR_REGNO;
14073 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
14074 || code == HTM_BUILTIN_SET_TEXASR
14075 || code == HTM_BUILTIN_GET_TEXASRU
14076 || code == HTM_BUILTIN_SET_TEXASRU);
14077 return TEXASR_REGNO;
14080 /* Return the correct ICODE value depending on whether we are
14081 setting or reading the HTM SPRs. */
14082 static inline enum insn_code
14083 rs6000_htm_spr_icode (bool nonvoid)
14086 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
14088 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
14091 /* Expand the HTM builtin in EXP and store the result in TARGET.
14092 Store true in *EXPANDEDP if we found a builtin to expand. */
14094 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
14096 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14097 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
14098 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14099 const struct builtin_description *d;
14104 if (!TARGET_POWERPC64
14105 && (fcode == HTM_BUILTIN_TABORTDC
14106 || fcode == HTM_BUILTIN_TABORTDCI))
14108 size_t uns_fcode = (size_t)fcode;
14109 const char *name = rs6000_builtin_info[uns_fcode].name;
14110 error ("builtin %qs is only valid in 64-bit mode", name);
14114 /* Expand the HTM builtins. */
14116 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
14117 if (d->code == fcode)
14119 rtx op[MAX_HTM_OPERANDS], pat;
14122 call_expr_arg_iterator iter;
14123 unsigned attr = rs6000_builtin_info[fcode].attr;
14124 enum insn_code icode = d->icode;
14125 const struct insn_operand_data *insn_op;
14126 bool uses_spr = (attr & RS6000_BTC_SPR);
14130 icode = rs6000_htm_spr_icode (nonvoid);
14131 insn_op = &insn_data[icode].operand[0];
14135 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
14137 || GET_MODE (target) != tmode
14138 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
14139 target = gen_reg_rtx (tmode);
14141 op[nopnds++] = target;
14144 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
14146 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
14149 insn_op = &insn_data[icode].operand[nopnds];
14151 op[nopnds] = expand_normal (arg);
14153 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
14155 if (!strcmp (insn_op->constraint, "n"))
14157 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
14158 if (!CONST_INT_P (op[nopnds]))
14159 error ("argument %d must be an unsigned literal", arg_num);
14161 error ("argument %d is an unsigned literal that is "
14162 "out of range", arg_num);
14165 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
14171 /* Handle the builtins for extended mnemonics. These accept
14172 no arguments, but map to builtins that take arguments. */
14175 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
14176 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
14177 op[nopnds++] = GEN_INT (1);
14179 attr |= RS6000_BTC_UNARY;
14181 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
14182 op[nopnds++] = GEN_INT (0);
14184 attr |= RS6000_BTC_UNARY;
14190 /* If this builtin accesses SPRs, then pass in the appropriate
14191 SPR number and SPR regno as the last two operands. */
14194 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
14195 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
14196 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
14198 /* If this builtin accesses a CR, then pass in a scratch
14199 CR as the last operand. */
14200 else if (attr & RS6000_BTC_CR)
14201 { cr = gen_reg_rtx (CCmode);
14207 int expected_nopnds = 0;
14208 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
14209 expected_nopnds = 1;
14210 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
14211 expected_nopnds = 2;
14212 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
14213 expected_nopnds = 3;
14214 if (!(attr & RS6000_BTC_VOID))
14215 expected_nopnds += 1;
14217 expected_nopnds += 2;
14219 gcc_assert (nopnds == expected_nopnds
14220 && nopnds <= MAX_HTM_OPERANDS);
14226 pat = GEN_FCN (icode) (op[0]);
14229 pat = GEN_FCN (icode) (op[0], op[1]);
14232 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
14235 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
14238 gcc_unreachable ();
14244 if (attr & RS6000_BTC_CR)
14246 if (fcode == HTM_BUILTIN_TBEGIN)
14248 /* Emit code to set TARGET to true or false depending on
14249 whether the tbegin. instruction successfully or failed
14250 to start a transaction. We do this by placing the 1's
14251 complement of CR's EQ bit into TARGET. */
14252 rtx scratch = gen_reg_rtx (SImode);
14253 emit_insn (gen_rtx_SET (scratch,
14254 gen_rtx_EQ (SImode, cr,
14256 emit_insn (gen_rtx_SET (target,
14257 gen_rtx_XOR (SImode, scratch,
14262 /* Emit code to copy the 4-bit condition register field
14263 CR into the least significant end of register TARGET. */
14264 rtx scratch1 = gen_reg_rtx (SImode);
14265 rtx scratch2 = gen_reg_rtx (SImode);
14266 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
14267 emit_insn (gen_movcc (subreg, cr));
14268 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
14269 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
14278 *expandedp = false;
14282 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
14285 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
14288 /* __builtin_cpu_init () is a nop, so expand to nothing. */
14289 if (fcode == RS6000_BUILTIN_CPU_INIT)
14292 if (target == 0 || GET_MODE (target) != SImode)
14293 target = gen_reg_rtx (SImode);
14295 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
14296 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
14297 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
14298 to a STRING_CST. */
14299 if (TREE_CODE (arg) == ARRAY_REF
14300 && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST
14301 && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST
14302 && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0)
14303 arg = TREE_OPERAND (arg, 0);
14305 if (TREE_CODE (arg) != STRING_CST)
14307 error ("builtin %qs only accepts a string argument",
14308 rs6000_builtin_info[(size_t) fcode].name);
14312 if (fcode == RS6000_BUILTIN_CPU_IS)
14314 const char *cpu = TREE_STRING_POINTER (arg);
14315 rtx cpuid = NULL_RTX;
14316 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
14317 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
14319 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
14320 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
14323 if (cpuid == NULL_RTX)
14325 /* Invalid CPU argument. */
14326 error ("cpu %qs is an invalid argument to builtin %qs",
14327 cpu, rs6000_builtin_info[(size_t) fcode].name);
14331 rtx platform = gen_reg_rtx (SImode);
14332 rtx tcbmem = gen_const_mem (SImode,
14333 gen_rtx_PLUS (Pmode,
14334 gen_rtx_REG (Pmode, TLS_REGNUM),
14335 GEN_INT (TCB_PLATFORM_OFFSET)));
14336 emit_move_insn (platform, tcbmem);
14337 emit_insn (gen_eqsi3 (target, platform, cpuid));
14339 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
14341 const char *hwcap = TREE_STRING_POINTER (arg);
14342 rtx mask = NULL_RTX;
14344 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
14345 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
14347 mask = GEN_INT (cpu_supports_info[i].mask);
14348 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
14351 if (mask == NULL_RTX)
14353 /* Invalid HWCAP argument. */
14354 error ("%s %qs is an invalid argument to builtin %qs",
14355 "hwcap", hwcap, rs6000_builtin_info[(size_t) fcode].name);
14359 rtx tcb_hwcap = gen_reg_rtx (SImode);
14360 rtx tcbmem = gen_const_mem (SImode,
14361 gen_rtx_PLUS (Pmode,
14362 gen_rtx_REG (Pmode, TLS_REGNUM),
14363 GEN_INT (hwcap_offset)));
14364 emit_move_insn (tcb_hwcap, tcbmem);
14365 rtx scratch1 = gen_reg_rtx (SImode);
14366 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
14367 rtx scratch2 = gen_reg_rtx (SImode);
14368 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
14369 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
14372 gcc_unreachable ();
14374 /* Record that we have expanded a CPU builtin, so that we can later
14375 emit a reference to the special symbol exported by LIBC to ensure we
14376 do not link against an old LIBC that doesn't support this feature. */
14377 cpu_builtin_p = true;
14380 warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware "
14381 "capability bits", rs6000_builtin_info[(size_t) fcode].name);
14383 /* For old LIBCs, always return FALSE. */
14384 emit_move_insn (target, GEN_INT (0));
14385 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
14391 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
14394 tree arg0 = CALL_EXPR_ARG (exp, 0);
14395 tree arg1 = CALL_EXPR_ARG (exp, 1);
14396 tree arg2 = CALL_EXPR_ARG (exp, 2);
14397 rtx op0 = expand_normal (arg0);
14398 rtx op1 = expand_normal (arg1);
14399 rtx op2 = expand_normal (arg2);
14400 machine_mode tmode = insn_data[icode].operand[0].mode;
14401 machine_mode mode0 = insn_data[icode].operand[1].mode;
14402 machine_mode mode1 = insn_data[icode].operand[2].mode;
14403 machine_mode mode2 = insn_data[icode].operand[3].mode;
14405 if (icode == CODE_FOR_nothing)
14406 /* Builtin not supported on this processor. */
14409 /* If we got invalid arguments bail out before generating bad rtl. */
14410 if (arg0 == error_mark_node
14411 || arg1 == error_mark_node
14412 || arg2 == error_mark_node)
14415 /* Check and prepare argument depending on the instruction code.
14417 Note that a switch statement instead of the sequence of tests
14418 would be incorrect as many of the CODE_FOR values could be
14419 CODE_FOR_nothing and that would yield multiple alternatives
14420 with identical values. We'd never reach here at runtime in
14422 if (icode == CODE_FOR_altivec_vsldoi_v4sf
14423 || icode == CODE_FOR_altivec_vsldoi_v2df
14424 || icode == CODE_FOR_altivec_vsldoi_v4si
14425 || icode == CODE_FOR_altivec_vsldoi_v8hi
14426 || icode == CODE_FOR_altivec_vsldoi_v16qi)
14428 /* Only allow 4-bit unsigned literals. */
14430 if (TREE_CODE (arg2) != INTEGER_CST
14431 || TREE_INT_CST_LOW (arg2) & ~0xf)
14433 error ("argument 3 must be a 4-bit unsigned literal");
14434 return CONST0_RTX (tmode);
14437 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
14438 || icode == CODE_FOR_vsx_xxpermdi_v2di
14439 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
14440 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
14441 || icode == CODE_FOR_vsx_xxpermdi_v1ti
14442 || icode == CODE_FOR_vsx_xxpermdi_v4sf
14443 || icode == CODE_FOR_vsx_xxpermdi_v4si
14444 || icode == CODE_FOR_vsx_xxpermdi_v8hi
14445 || icode == CODE_FOR_vsx_xxpermdi_v16qi
14446 || icode == CODE_FOR_vsx_xxsldwi_v16qi
14447 || icode == CODE_FOR_vsx_xxsldwi_v8hi
14448 || icode == CODE_FOR_vsx_xxsldwi_v4si
14449 || icode == CODE_FOR_vsx_xxsldwi_v4sf
14450 || icode == CODE_FOR_vsx_xxsldwi_v2di
14451 || icode == CODE_FOR_vsx_xxsldwi_v2df)
14453 /* Only allow 2-bit unsigned literals. */
14455 if (TREE_CODE (arg2) != INTEGER_CST
14456 || TREE_INT_CST_LOW (arg2) & ~0x3)
14458 error ("argument 3 must be a 2-bit unsigned literal");
14459 return CONST0_RTX (tmode);
14462 else if (icode == CODE_FOR_vsx_set_v2df
14463 || icode == CODE_FOR_vsx_set_v2di
14464 || icode == CODE_FOR_bcdadd
14465 || icode == CODE_FOR_bcdadd_lt
14466 || icode == CODE_FOR_bcdadd_eq
14467 || icode == CODE_FOR_bcdadd_gt
14468 || icode == CODE_FOR_bcdsub
14469 || icode == CODE_FOR_bcdsub_lt
14470 || icode == CODE_FOR_bcdsub_eq
14471 || icode == CODE_FOR_bcdsub_gt)
14473 /* Only allow 1-bit unsigned literals. */
14475 if (TREE_CODE (arg2) != INTEGER_CST
14476 || TREE_INT_CST_LOW (arg2) & ~0x1)
14478 error ("argument 3 must be a 1-bit unsigned literal");
14479 return CONST0_RTX (tmode);
14482 else if (icode == CODE_FOR_dfp_ddedpd_dd
14483 || icode == CODE_FOR_dfp_ddedpd_td)
14485 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
14487 if (TREE_CODE (arg0) != INTEGER_CST
14488 || TREE_INT_CST_LOW (arg2) & ~0x3)
14490 error ("argument 1 must be 0 or 2");
14491 return CONST0_RTX (tmode);
14494 else if (icode == CODE_FOR_dfp_denbcd_dd
14495 || icode == CODE_FOR_dfp_denbcd_td)
14497 /* Only allow 1-bit unsigned literals. */
14499 if (TREE_CODE (arg0) != INTEGER_CST
14500 || TREE_INT_CST_LOW (arg0) & ~0x1)
14502 error ("argument 1 must be a 1-bit unsigned literal");
14503 return CONST0_RTX (tmode);
14506 else if (icode == CODE_FOR_dfp_dscli_dd
14507 || icode == CODE_FOR_dfp_dscli_td
14508 || icode == CODE_FOR_dfp_dscri_dd
14509 || icode == CODE_FOR_dfp_dscri_td)
14511 /* Only allow 6-bit unsigned literals. */
14513 if (TREE_CODE (arg1) != INTEGER_CST
14514 || TREE_INT_CST_LOW (arg1) & ~0x3f)
14516 error ("argument 2 must be a 6-bit unsigned literal");
14517 return CONST0_RTX (tmode);
14520 else if (icode == CODE_FOR_crypto_vshasigmaw
14521 || icode == CODE_FOR_crypto_vshasigmad)
14523 /* Check whether the 2nd and 3rd arguments are integer constants and in
14524 range and prepare arguments. */
14526 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (wi::to_wide (arg1), 2))
14528 error ("argument 2 must be 0 or 1");
14529 return CONST0_RTX (tmode);
14533 if (TREE_CODE (arg2) != INTEGER_CST
14534 || wi::geu_p (wi::to_wide (arg2), 16))
14536 error ("argument 3 must be in the range 0..15");
14537 return CONST0_RTX (tmode);
14542 || GET_MODE (target) != tmode
14543 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14544 target = gen_reg_rtx (tmode);
14546 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14547 op0 = copy_to_mode_reg (mode0, op0);
14548 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14549 op1 = copy_to_mode_reg (mode1, op1);
14550 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14551 op2 = copy_to_mode_reg (mode2, op2);
14553 pat = GEN_FCN (icode) (target, op0, op1, op2);
14562 /* Expand the dst builtins. */
14564 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
14567 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14568 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14569 tree arg0, arg1, arg2;
14570 machine_mode mode0, mode1;
14571 rtx pat, op0, op1, op2;
14572 const struct builtin_description *d;
14575 *expandedp = false;
14577 /* Handle DST variants. */
14579 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14580 if (d->code == fcode)
14582 arg0 = CALL_EXPR_ARG (exp, 0);
14583 arg1 = CALL_EXPR_ARG (exp, 1);
14584 arg2 = CALL_EXPR_ARG (exp, 2);
14585 op0 = expand_normal (arg0);
14586 op1 = expand_normal (arg1);
14587 op2 = expand_normal (arg2);
14588 mode0 = insn_data[d->icode].operand[0].mode;
14589 mode1 = insn_data[d->icode].operand[1].mode;
14591 /* Invalid arguments, bail out before generating bad rtl. */
14592 if (arg0 == error_mark_node
14593 || arg1 == error_mark_node
14594 || arg2 == error_mark_node)
14599 if (TREE_CODE (arg2) != INTEGER_CST
14600 || TREE_INT_CST_LOW (arg2) & ~0x3)
14602 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
14606 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14607 op0 = copy_to_mode_reg (Pmode, op0);
14608 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14609 op1 = copy_to_mode_reg (mode1, op1);
14611 pat = GEN_FCN (d->icode) (op0, op1, op2);
14621 /* Expand vec_init builtin. */
14623 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
14625 machine_mode tmode = TYPE_MODE (type);
14626 machine_mode inner_mode = GET_MODE_INNER (tmode);
14627 int i, n_elt = GET_MODE_NUNITS (tmode);
14629 gcc_assert (VECTOR_MODE_P (tmode));
14630 gcc_assert (n_elt == call_expr_nargs (exp));
14632 if (!target || !register_operand (target, tmode))
14633 target = gen_reg_rtx (tmode);
14635 /* If we have a vector compromised of a single element, such as V1TImode, do
14636 the initialization directly. */
14637 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
14639 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
14640 emit_move_insn (target, gen_lowpart (tmode, x));
14644 rtvec v = rtvec_alloc (n_elt);
14646 for (i = 0; i < n_elt; ++i)
14648 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
14649 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14652 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
14658 /* Return the integer constant in ARG. Constrain it to be in the range
14659 of the subparts of VEC_TYPE; issue an error if not. */
14662 get_element_number (tree vec_type, tree arg)
14664 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14666 if (!tree_fits_uhwi_p (arg)
14667 || (elt = tree_to_uhwi (arg), elt > max))
14669 error ("selector must be an integer constant in the range 0..%wi", max);
14676 /* Expand vec_set builtin. */
14678 altivec_expand_vec_set_builtin (tree exp)
14680 machine_mode tmode, mode1;
14681 tree arg0, arg1, arg2;
14685 arg0 = CALL_EXPR_ARG (exp, 0);
14686 arg1 = CALL_EXPR_ARG (exp, 1);
14687 arg2 = CALL_EXPR_ARG (exp, 2);
14689 tmode = TYPE_MODE (TREE_TYPE (arg0));
14690 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14691 gcc_assert (VECTOR_MODE_P (tmode));
14693 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
14694 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
14695 elt = get_element_number (TREE_TYPE (arg0), arg2);
14697 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14698 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14700 op0 = force_reg (tmode, op0);
14701 op1 = force_reg (mode1, op1);
14703 rs6000_expand_vector_set (op0, op1, elt);
14708 /* Expand vec_ext builtin. */
14710 altivec_expand_vec_ext_builtin (tree exp, rtx target)
14712 machine_mode tmode, mode0;
14717 arg0 = CALL_EXPR_ARG (exp, 0);
14718 arg1 = CALL_EXPR_ARG (exp, 1);
14720 op0 = expand_normal (arg0);
14721 op1 = expand_normal (arg1);
14723 /* Call get_element_number to validate arg1 if it is a constant. */
14724 if (TREE_CODE (arg1) == INTEGER_CST)
14725 (void) get_element_number (TREE_TYPE (arg0), arg1);
14727 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14728 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14729 gcc_assert (VECTOR_MODE_P (mode0));
14731 op0 = force_reg (mode0, op0);
14733 if (optimize || !target || !register_operand (target, tmode))
14734 target = gen_reg_rtx (tmode);
14736 rs6000_expand_vector_extract (target, op0, op1);
14741 /* Expand the builtin in EXP and store the result in TARGET. Store
14742 true in *EXPANDEDP if we found a builtin to expand. */
14744 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
14746 const struct builtin_description *d;
14748 enum insn_code icode;
14749 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14750 tree arg0, arg1, arg2;
14752 machine_mode tmode, mode0;
14753 enum rs6000_builtins fcode
14754 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14756 if (rs6000_overloaded_builtin_p (fcode))
14759 error ("unresolved overload for Altivec builtin %qF", fndecl);
14761 /* Given it is invalid, just generate a normal call. */
14762 return expand_call (exp, target, false);
14765 target = altivec_expand_dst_builtin (exp, target, expandedp);
14773 case ALTIVEC_BUILTIN_STVX_V2DF:
14774 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
14775 case ALTIVEC_BUILTIN_STVX_V2DI:
14776 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
14777 case ALTIVEC_BUILTIN_STVX_V4SF:
14778 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
14779 case ALTIVEC_BUILTIN_STVX:
14780 case ALTIVEC_BUILTIN_STVX_V4SI:
14781 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
14782 case ALTIVEC_BUILTIN_STVX_V8HI:
14783 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
14784 case ALTIVEC_BUILTIN_STVX_V16QI:
14785 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
14786 case ALTIVEC_BUILTIN_STVEBX:
14787 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
14788 case ALTIVEC_BUILTIN_STVEHX:
14789 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
14790 case ALTIVEC_BUILTIN_STVEWX:
14791 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
14792 case ALTIVEC_BUILTIN_STVXL_V2DF:
14793 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
14794 case ALTIVEC_BUILTIN_STVXL_V2DI:
14795 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
14796 case ALTIVEC_BUILTIN_STVXL_V4SF:
14797 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
14798 case ALTIVEC_BUILTIN_STVXL:
14799 case ALTIVEC_BUILTIN_STVXL_V4SI:
14800 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
14801 case ALTIVEC_BUILTIN_STVXL_V8HI:
14802 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
14803 case ALTIVEC_BUILTIN_STVXL_V16QI:
14804 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
14806 case ALTIVEC_BUILTIN_STVLX:
14807 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
14808 case ALTIVEC_BUILTIN_STVLXL:
14809 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
14810 case ALTIVEC_BUILTIN_STVRX:
14811 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
14812 case ALTIVEC_BUILTIN_STVRXL:
14813 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
14815 case P9V_BUILTIN_STXVL:
14816 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
14818 case P9V_BUILTIN_XST_LEN_R:
14819 return altivec_expand_stxvl_builtin (CODE_FOR_xst_len_r, exp);
14821 case VSX_BUILTIN_STXVD2X_V1TI:
14822 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
14823 case VSX_BUILTIN_STXVD2X_V2DF:
14824 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
14825 case VSX_BUILTIN_STXVD2X_V2DI:
14826 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
14827 case VSX_BUILTIN_STXVW4X_V4SF:
14828 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
14829 case VSX_BUILTIN_STXVW4X_V4SI:
14830 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
14831 case VSX_BUILTIN_STXVW4X_V8HI:
14832 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
14833 case VSX_BUILTIN_STXVW4X_V16QI:
14834 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
14836 /* For the following on big endian, it's ok to use any appropriate
14837 unaligned-supporting store, so use a generic expander. For
14838 little-endian, the exact element-reversing instruction must
14840 case VSX_BUILTIN_ST_ELEMREV_V1TI:
14842 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti
14843 : CODE_FOR_vsx_st_elemrev_v1ti);
14844 return altivec_expand_stv_builtin (code, exp);
14846 case VSX_BUILTIN_ST_ELEMREV_V2DF:
14848 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
14849 : CODE_FOR_vsx_st_elemrev_v2df);
14850 return altivec_expand_stv_builtin (code, exp);
14852 case VSX_BUILTIN_ST_ELEMREV_V2DI:
14854 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
14855 : CODE_FOR_vsx_st_elemrev_v2di);
14856 return altivec_expand_stv_builtin (code, exp);
14858 case VSX_BUILTIN_ST_ELEMREV_V4SF:
14860 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
14861 : CODE_FOR_vsx_st_elemrev_v4sf);
14862 return altivec_expand_stv_builtin (code, exp);
14864 case VSX_BUILTIN_ST_ELEMREV_V4SI:
14866 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
14867 : CODE_FOR_vsx_st_elemrev_v4si);
14868 return altivec_expand_stv_builtin (code, exp);
14870 case VSX_BUILTIN_ST_ELEMREV_V8HI:
14872 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
14873 : CODE_FOR_vsx_st_elemrev_v8hi);
14874 return altivec_expand_stv_builtin (code, exp);
14876 case VSX_BUILTIN_ST_ELEMREV_V16QI:
14878 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
14879 : CODE_FOR_vsx_st_elemrev_v16qi);
14880 return altivec_expand_stv_builtin (code, exp);
14883 case ALTIVEC_BUILTIN_MFVSCR:
14884 icode = CODE_FOR_altivec_mfvscr;
14885 tmode = insn_data[icode].operand[0].mode;
14888 || GET_MODE (target) != tmode
14889 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14890 target = gen_reg_rtx (tmode);
14892 pat = GEN_FCN (icode) (target);
14898 case ALTIVEC_BUILTIN_MTVSCR:
14899 icode = CODE_FOR_altivec_mtvscr;
14900 arg0 = CALL_EXPR_ARG (exp, 0);
14901 op0 = expand_normal (arg0);
14902 mode0 = insn_data[icode].operand[0].mode;
14904 /* If we got invalid arguments bail out before generating bad rtl. */
14905 if (arg0 == error_mark_node)
14908 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14909 op0 = copy_to_mode_reg (mode0, op0);
14911 pat = GEN_FCN (icode) (op0);
14916 case ALTIVEC_BUILTIN_DSSALL:
14917 emit_insn (gen_altivec_dssall ());
14920 case ALTIVEC_BUILTIN_DSS:
14921 icode = CODE_FOR_altivec_dss;
14922 arg0 = CALL_EXPR_ARG (exp, 0);
14924 op0 = expand_normal (arg0);
14925 mode0 = insn_data[icode].operand[0].mode;
14927 /* If we got invalid arguments bail out before generating bad rtl. */
14928 if (arg0 == error_mark_node)
14931 if (TREE_CODE (arg0) != INTEGER_CST
14932 || TREE_INT_CST_LOW (arg0) & ~0x3)
14934 error ("argument to %qs must be a 2-bit unsigned literal", "dss");
14938 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14939 op0 = copy_to_mode_reg (mode0, op0);
14941 emit_insn (gen_altivec_dss (op0));
14944 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
14945 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
14946 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
14947 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
14948 case VSX_BUILTIN_VEC_INIT_V2DF:
14949 case VSX_BUILTIN_VEC_INIT_V2DI:
14950 case VSX_BUILTIN_VEC_INIT_V1TI:
14951 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
14953 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
14954 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
14955 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
14956 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
14957 case VSX_BUILTIN_VEC_SET_V2DF:
14958 case VSX_BUILTIN_VEC_SET_V2DI:
14959 case VSX_BUILTIN_VEC_SET_V1TI:
14960 return altivec_expand_vec_set_builtin (exp);
14962 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
14963 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
14964 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
14965 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
14966 case VSX_BUILTIN_VEC_EXT_V2DF:
14967 case VSX_BUILTIN_VEC_EXT_V2DI:
14968 case VSX_BUILTIN_VEC_EXT_V1TI:
14969 return altivec_expand_vec_ext_builtin (exp, target);
14971 case P9V_BUILTIN_VEC_EXTRACT4B:
14972 arg1 = CALL_EXPR_ARG (exp, 1);
14975 /* Generate a normal call if it is invalid. */
14976 if (arg1 == error_mark_node)
14977 return expand_call (exp, target, false);
14979 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
14981 error ("second argument to %qs must be 0..12", "vec_vextract4b");
14982 return expand_call (exp, target, false);
14986 case P9V_BUILTIN_VEC_INSERT4B:
14987 arg2 = CALL_EXPR_ARG (exp, 2);
14990 /* Generate a normal call if it is invalid. */
14991 if (arg2 == error_mark_node)
14992 return expand_call (exp, target, false);
14994 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
14996 error ("third argument to %qs must be 0..12", "vec_vinsert4b");
14997 return expand_call (exp, target, false);
15003 /* Fall through. */
15006 /* Expand abs* operations. */
15008 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15009 if (d->code == fcode)
15010 return altivec_expand_abs_builtin (d->icode, exp, target);
15012 /* Expand the AltiVec predicates. */
15013 d = bdesc_altivec_preds;
15014 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15015 if (d->code == fcode)
15016 return altivec_expand_predicate_builtin (d->icode, exp, target);
15018 /* LV* are funky. We initialized them differently. */
15021 case ALTIVEC_BUILTIN_LVSL:
15022 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
15023 exp, target, false);
15024 case ALTIVEC_BUILTIN_LVSR:
15025 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
15026 exp, target, false);
15027 case ALTIVEC_BUILTIN_LVEBX:
15028 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
15029 exp, target, false);
15030 case ALTIVEC_BUILTIN_LVEHX:
15031 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
15032 exp, target, false);
15033 case ALTIVEC_BUILTIN_LVEWX:
15034 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
15035 exp, target, false);
15036 case ALTIVEC_BUILTIN_LVXL_V2DF:
15037 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
15038 exp, target, false);
15039 case ALTIVEC_BUILTIN_LVXL_V2DI:
15040 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
15041 exp, target, false);
15042 case ALTIVEC_BUILTIN_LVXL_V4SF:
15043 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
15044 exp, target, false);
15045 case ALTIVEC_BUILTIN_LVXL:
15046 case ALTIVEC_BUILTIN_LVXL_V4SI:
15047 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
15048 exp, target, false);
15049 case ALTIVEC_BUILTIN_LVXL_V8HI:
15050 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
15051 exp, target, false);
15052 case ALTIVEC_BUILTIN_LVXL_V16QI:
15053 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
15054 exp, target, false);
15055 case ALTIVEC_BUILTIN_LVX_V1TI:
15056 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v1ti,
15057 exp, target, false);
15058 case ALTIVEC_BUILTIN_LVX_V2DF:
15059 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
15060 exp, target, false);
15061 case ALTIVEC_BUILTIN_LVX_V2DI:
15062 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
15063 exp, target, false);
15064 case ALTIVEC_BUILTIN_LVX_V4SF:
15065 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
15066 exp, target, false);
15067 case ALTIVEC_BUILTIN_LVX:
15068 case ALTIVEC_BUILTIN_LVX_V4SI:
15069 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
15070 exp, target, false);
15071 case ALTIVEC_BUILTIN_LVX_V8HI:
15072 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
15073 exp, target, false);
15074 case ALTIVEC_BUILTIN_LVX_V16QI:
15075 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
15076 exp, target, false);
15077 case ALTIVEC_BUILTIN_LVLX:
15078 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
15079 exp, target, true);
15080 case ALTIVEC_BUILTIN_LVLXL:
15081 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
15082 exp, target, true);
15083 case ALTIVEC_BUILTIN_LVRX:
15084 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
15085 exp, target, true);
15086 case ALTIVEC_BUILTIN_LVRXL:
15087 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
15088 exp, target, true);
15089 case VSX_BUILTIN_LXVD2X_V1TI:
15090 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
15091 exp, target, false);
15092 case VSX_BUILTIN_LXVD2X_V2DF:
15093 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
15094 exp, target, false);
15095 case VSX_BUILTIN_LXVD2X_V2DI:
15096 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
15097 exp, target, false);
15098 case VSX_BUILTIN_LXVW4X_V4SF:
15099 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
15100 exp, target, false);
15101 case VSX_BUILTIN_LXVW4X_V4SI:
15102 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
15103 exp, target, false);
15104 case VSX_BUILTIN_LXVW4X_V8HI:
15105 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
15106 exp, target, false);
15107 case VSX_BUILTIN_LXVW4X_V16QI:
15108 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
15109 exp, target, false);
15110 /* For the following on big endian, it's ok to use any appropriate
15111 unaligned-supporting load, so use a generic expander. For
15112 little-endian, the exact element-reversing instruction must
15114 case VSX_BUILTIN_LD_ELEMREV_V2DF:
15116 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
15117 : CODE_FOR_vsx_ld_elemrev_v2df);
15118 return altivec_expand_lv_builtin (code, exp, target, false);
15120 case VSX_BUILTIN_LD_ELEMREV_V1TI:
15122 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti
15123 : CODE_FOR_vsx_ld_elemrev_v1ti);
15124 return altivec_expand_lv_builtin (code, exp, target, false);
15126 case VSX_BUILTIN_LD_ELEMREV_V2DI:
15128 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
15129 : CODE_FOR_vsx_ld_elemrev_v2di);
15130 return altivec_expand_lv_builtin (code, exp, target, false);
15132 case VSX_BUILTIN_LD_ELEMREV_V4SF:
15134 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
15135 : CODE_FOR_vsx_ld_elemrev_v4sf);
15136 return altivec_expand_lv_builtin (code, exp, target, false);
15138 case VSX_BUILTIN_LD_ELEMREV_V4SI:
15140 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
15141 : CODE_FOR_vsx_ld_elemrev_v4si);
15142 return altivec_expand_lv_builtin (code, exp, target, false);
15144 case VSX_BUILTIN_LD_ELEMREV_V8HI:
15146 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
15147 : CODE_FOR_vsx_ld_elemrev_v8hi);
15148 return altivec_expand_lv_builtin (code, exp, target, false);
15150 case VSX_BUILTIN_LD_ELEMREV_V16QI:
15152 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
15153 : CODE_FOR_vsx_ld_elemrev_v16qi);
15154 return altivec_expand_lv_builtin (code, exp, target, false);
15159 /* Fall through. */
15162 *expandedp = false;
15166 /* Check whether a builtin function is supported in this target
15169 rs6000_builtin_is_supported_p (enum rs6000_builtins fncode)
15171 HOST_WIDE_INT fnmask = rs6000_builtin_info[fncode].mask;
15172 if ((fnmask & rs6000_builtin_mask) != fnmask)
15178 /* Raise an error message for a builtin function that is called without the
15179 appropriate target options being set. */
15182 rs6000_invalid_builtin (enum rs6000_builtins fncode)
15184 size_t uns_fncode = (size_t) fncode;
15185 const char *name = rs6000_builtin_info[uns_fncode].name;
15186 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
15188 gcc_assert (name != NULL);
15189 if ((fnmask & RS6000_BTM_CELL) != 0)
15190 error ("builtin function %qs is only valid for the cell processor", name);
15191 else if ((fnmask & RS6000_BTM_VSX) != 0)
15192 error ("builtin function %qs requires the %qs option", name, "-mvsx");
15193 else if ((fnmask & RS6000_BTM_HTM) != 0)
15194 error ("builtin function %qs requires the %qs option", name, "-mhtm");
15195 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
15196 error ("builtin function %qs requires the %qs option", name, "-maltivec");
15197 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
15198 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
15199 error ("builtin function %qs requires the %qs and %qs options",
15200 name, "-mhard-dfp", "-mpower8-vector");
15201 else if ((fnmask & RS6000_BTM_DFP) != 0)
15202 error ("builtin function %qs requires the %qs option", name, "-mhard-dfp");
15203 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
15204 error ("builtin function %qs requires the %qs option", name,
15205 "-mpower8-vector");
15206 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
15207 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
15208 error ("builtin function %qs requires the %qs and %qs options",
15209 name, "-mcpu=power9", "-m64");
15210 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
15211 error ("builtin function %qs requires the %qs option", name,
15213 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
15214 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
15215 error ("builtin function %qs requires the %qs and %qs options",
15216 name, "-mcpu=power9", "-m64");
15217 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
15218 error ("builtin function %qs requires the %qs option", name,
15220 else if ((fnmask & RS6000_BTM_LDBL128) == RS6000_BTM_LDBL128)
15222 if (!TARGET_HARD_FLOAT)
15223 error ("builtin function %qs requires the %qs option", name,
15226 error ("builtin function %qs requires the %qs option", name,
15227 TARGET_IEEEQUAD ? "-mabi=ibmlongdouble" : "-mlong-double-128");
15229 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
15230 error ("builtin function %qs requires the %qs option", name,
15232 else if ((fnmask & RS6000_BTM_FLOAT128_HW) != 0)
15233 error ("builtin function %qs requires ISA 3.0 IEEE 128-bit floating point",
15235 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
15236 error ("builtin function %qs requires the %qs option", name, "-mfloat128");
15237 else if ((fnmask & (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
15238 == (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
15239 error ("builtin function %qs requires the %qs (or newer), and "
15240 "%qs or %qs options",
15241 name, "-mcpu=power7", "-m64", "-mpowerpc64");
15243 error ("builtin function %qs is not supported with the current options",
15247 /* Target hook for early folding of built-ins, shamelessly stolen
15251 rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED,
15252 int n_args ATTRIBUTE_UNUSED,
15253 tree *args ATTRIBUTE_UNUSED,
15254 bool ignore ATTRIBUTE_UNUSED)
15256 #ifdef SUBTARGET_FOLD_BUILTIN
15257 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
15263 /* Helper function to sort out which built-ins may be valid without having
15266 rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
15270 case ALTIVEC_BUILTIN_STVX_V16QI:
15271 case ALTIVEC_BUILTIN_STVX_V8HI:
15272 case ALTIVEC_BUILTIN_STVX_V4SI:
15273 case ALTIVEC_BUILTIN_STVX_V4SF:
15274 case ALTIVEC_BUILTIN_STVX_V2DI:
15275 case ALTIVEC_BUILTIN_STVX_V2DF:
15276 case VSX_BUILTIN_STXVW4X_V16QI:
15277 case VSX_BUILTIN_STXVW4X_V8HI:
15278 case VSX_BUILTIN_STXVW4X_V4SF:
15279 case VSX_BUILTIN_STXVW4X_V4SI:
15280 case VSX_BUILTIN_STXVD2X_V2DF:
15281 case VSX_BUILTIN_STXVD2X_V2DI:
15288 /* Helper function to handle the gimple folding of a vector compare
15289 operation. This sets up true/false vectors, and uses the
15290 VEC_COND_EXPR operation.
15291 CODE indicates which comparison is to be made. (EQ, GT, ...).
15292 TYPE indicates the type of the result. */
15294 fold_build_vec_cmp (tree_code code, tree type,
15295 tree arg0, tree arg1)
15297 tree cmp_type = build_same_sized_truth_vector_type (type);
15298 tree zero_vec = build_zero_cst (type);
15299 tree minus_one_vec = build_minus_one_cst (type);
15300 tree cmp = fold_build2 (code, cmp_type, arg0, arg1);
15301 return fold_build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
15304 /* Helper function to handle the in-between steps for the
15305 vector compare built-ins. */
15307 fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt)
15309 tree arg0 = gimple_call_arg (stmt, 0);
15310 tree arg1 = gimple_call_arg (stmt, 1);
15311 tree lhs = gimple_call_lhs (stmt);
15312 tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1);
15313 gimple *g = gimple_build_assign (lhs, cmp);
15314 gimple_set_location (g, gimple_location (stmt));
15315 gsi_replace (gsi, g, true);
15318 /* Helper function to map V2DF and V4SF types to their
15319 integral equivalents (V2DI and V4SI). */
15320 tree map_to_integral_tree_type (tree input_tree_type)
15322 if (INTEGRAL_TYPE_P (TREE_TYPE (input_tree_type)))
15323 return input_tree_type;
15326 if (types_compatible_p (TREE_TYPE (input_tree_type),
15327 TREE_TYPE (V2DF_type_node)))
15328 return V2DI_type_node;
15329 else if (types_compatible_p (TREE_TYPE (input_tree_type),
15330 TREE_TYPE (V4SF_type_node)))
15331 return V4SI_type_node;
15333 gcc_unreachable ();
15337 /* Helper function to handle the vector merge[hl] built-ins. The
15338 implementation difference between h and l versions for this code are in
15339 the values used when building of the permute vector for high word versus
15340 low word merge. The variance is keyed off the use_high parameter. */
15342 fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high)
15344 tree arg0 = gimple_call_arg (stmt, 0);
15345 tree arg1 = gimple_call_arg (stmt, 1);
15346 tree lhs = gimple_call_lhs (stmt);
15347 tree lhs_type = TREE_TYPE (lhs);
15348 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
15349 int midpoint = n_elts / 2;
15355 /* The permute_type will match the lhs for integral types. For double and
15356 float types, the permute type needs to map to the V2 or V4 type that
15359 permute_type = map_to_integral_tree_type (lhs_type);
15360 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
15362 for (int i = 0; i < midpoint; i++)
15364 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
15366 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
15367 offset + n_elts + i));
15370 tree permute = elts.build ();
15372 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
15373 gimple_set_location (g, gimple_location (stmt));
15374 gsi_replace (gsi, g, true);
15377 /* Helper function to handle the vector merge[eo] built-ins. */
15379 fold_mergeeo_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_odd)
15381 tree arg0 = gimple_call_arg (stmt, 0);
15382 tree arg1 = gimple_call_arg (stmt, 1);
15383 tree lhs = gimple_call_lhs (stmt);
15384 tree lhs_type = TREE_TYPE (lhs);
15385 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
15387 /* The permute_type will match the lhs for integral types. For double and
15388 float types, the permute type needs to map to the V2 or V4 type that
15391 permute_type = map_to_integral_tree_type (lhs_type);
15393 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
15395 /* Build the permute vector. */
15396 for (int i = 0; i < n_elts / 2; i++)
15398 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
15400 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
15401 2*i + use_odd + n_elts));
15404 tree permute = elts.build ();
15406 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
15407 gimple_set_location (g, gimple_location (stmt));
15408 gsi_replace (gsi, g, true);
15411 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
15412 a constant, use rs6000_fold_builtin.) */
15415 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
15417 gimple *stmt = gsi_stmt (*gsi);
15418 tree fndecl = gimple_call_fndecl (stmt);
15419 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
15420 enum rs6000_builtins fn_code
15421 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15422 tree arg0, arg1, lhs, temp;
15423 enum tree_code bcode;
15426 size_t uns_fncode = (size_t) fn_code;
15427 enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
15428 const char *fn_name1 = rs6000_builtin_info[uns_fncode].name;
15429 const char *fn_name2 = (icode != CODE_FOR_nothing)
15430 ? get_insn_name ((int) icode)
15433 if (TARGET_DEBUG_BUILTIN)
15434 fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n",
15435 fn_code, fn_name1, fn_name2);
15437 if (!rs6000_fold_gimple)
15440 /* Prevent gimple folding for code that does not have a LHS, unless it is
15441 allowed per the rs6000_builtin_valid_without_lhs helper function. */
15442 if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code))
15445 /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */
15446 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fncode].mask;
15447 bool func_valid_p = (rs6000_builtin_mask & mask) == mask;
15453 /* Flavors of vec_add. We deliberately don't expand
15454 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
15455 TImode, resulting in much poorer code generation. */
15456 case ALTIVEC_BUILTIN_VADDUBM:
15457 case ALTIVEC_BUILTIN_VADDUHM:
15458 case ALTIVEC_BUILTIN_VADDUWM:
15459 case P8V_BUILTIN_VADDUDM:
15460 case ALTIVEC_BUILTIN_VADDFP:
15461 case VSX_BUILTIN_XVADDDP:
15464 arg0 = gimple_call_arg (stmt, 0);
15465 arg1 = gimple_call_arg (stmt, 1);
15466 lhs = gimple_call_lhs (stmt);
15467 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (lhs)))
15468 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (lhs))))
15470 /* Ensure the binary operation is performed in a type
15471 that wraps if it is integral type. */
15472 gimple_seq stmts = NULL;
15473 tree type = unsigned_type_for (TREE_TYPE (lhs));
15474 tree uarg0 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15476 tree uarg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15478 tree res = gimple_build (&stmts, gimple_location (stmt), bcode,
15479 type, uarg0, uarg1);
15480 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15481 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR,
15482 build1 (VIEW_CONVERT_EXPR,
15483 TREE_TYPE (lhs), res));
15484 gsi_replace (gsi, g, true);
15487 g = gimple_build_assign (lhs, bcode, arg0, arg1);
15488 gimple_set_location (g, gimple_location (stmt));
15489 gsi_replace (gsi, g, true);
15491 /* Flavors of vec_sub. We deliberately don't expand
15492 P8V_BUILTIN_VSUBUQM. */
15493 case ALTIVEC_BUILTIN_VSUBUBM:
15494 case ALTIVEC_BUILTIN_VSUBUHM:
15495 case ALTIVEC_BUILTIN_VSUBUWM:
15496 case P8V_BUILTIN_VSUBUDM:
15497 case ALTIVEC_BUILTIN_VSUBFP:
15498 case VSX_BUILTIN_XVSUBDP:
15499 bcode = MINUS_EXPR;
15501 case VSX_BUILTIN_XVMULSP:
15502 case VSX_BUILTIN_XVMULDP:
15503 arg0 = gimple_call_arg (stmt, 0);
15504 arg1 = gimple_call_arg (stmt, 1);
15505 lhs = gimple_call_lhs (stmt);
15506 g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
15507 gimple_set_location (g, gimple_location (stmt));
15508 gsi_replace (gsi, g, true);
15510 /* Even element flavors of vec_mul (signed). */
15511 case ALTIVEC_BUILTIN_VMULESB:
15512 case ALTIVEC_BUILTIN_VMULESH:
15513 case P8V_BUILTIN_VMULESW:
15514 /* Even element flavors of vec_mul (unsigned). */
15515 case ALTIVEC_BUILTIN_VMULEUB:
15516 case ALTIVEC_BUILTIN_VMULEUH:
15517 case P8V_BUILTIN_VMULEUW:
15518 arg0 = gimple_call_arg (stmt, 0);
15519 arg1 = gimple_call_arg (stmt, 1);
15520 lhs = gimple_call_lhs (stmt);
15521 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
15522 gimple_set_location (g, gimple_location (stmt));
15523 gsi_replace (gsi, g, true);
15525 /* Odd element flavors of vec_mul (signed). */
15526 case ALTIVEC_BUILTIN_VMULOSB:
15527 case ALTIVEC_BUILTIN_VMULOSH:
15528 case P8V_BUILTIN_VMULOSW:
15529 /* Odd element flavors of vec_mul (unsigned). */
15530 case ALTIVEC_BUILTIN_VMULOUB:
15531 case ALTIVEC_BUILTIN_VMULOUH:
15532 case P8V_BUILTIN_VMULOUW:
15533 arg0 = gimple_call_arg (stmt, 0);
15534 arg1 = gimple_call_arg (stmt, 1);
15535 lhs = gimple_call_lhs (stmt);
15536 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
15537 gimple_set_location (g, gimple_location (stmt));
15538 gsi_replace (gsi, g, true);
15540 /* Flavors of vec_div (Integer). */
15541 case VSX_BUILTIN_DIV_V2DI:
15542 case VSX_BUILTIN_UDIV_V2DI:
15543 arg0 = gimple_call_arg (stmt, 0);
15544 arg1 = gimple_call_arg (stmt, 1);
15545 lhs = gimple_call_lhs (stmt);
15546 g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
15547 gimple_set_location (g, gimple_location (stmt));
15548 gsi_replace (gsi, g, true);
15550 /* Flavors of vec_div (Float). */
15551 case VSX_BUILTIN_XVDIVSP:
15552 case VSX_BUILTIN_XVDIVDP:
15553 arg0 = gimple_call_arg (stmt, 0);
15554 arg1 = gimple_call_arg (stmt, 1);
15555 lhs = gimple_call_lhs (stmt);
15556 g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
15557 gimple_set_location (g, gimple_location (stmt));
15558 gsi_replace (gsi, g, true);
15560 /* Flavors of vec_and. */
15561 case ALTIVEC_BUILTIN_VAND:
15562 arg0 = gimple_call_arg (stmt, 0);
15563 arg1 = gimple_call_arg (stmt, 1);
15564 lhs = gimple_call_lhs (stmt);
15565 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
15566 gimple_set_location (g, gimple_location (stmt));
15567 gsi_replace (gsi, g, true);
15569 /* Flavors of vec_andc. */
15570 case ALTIVEC_BUILTIN_VANDC:
15571 arg0 = gimple_call_arg (stmt, 0);
15572 arg1 = gimple_call_arg (stmt, 1);
15573 lhs = gimple_call_lhs (stmt);
15574 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15575 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15576 gimple_set_location (g, gimple_location (stmt));
15577 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15578 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
15579 gimple_set_location (g, gimple_location (stmt));
15580 gsi_replace (gsi, g, true);
15582 /* Flavors of vec_nand. */
15583 case P8V_BUILTIN_VEC_NAND:
15584 case P8V_BUILTIN_NAND_V16QI:
15585 case P8V_BUILTIN_NAND_V8HI:
15586 case P8V_BUILTIN_NAND_V4SI:
15587 case P8V_BUILTIN_NAND_V4SF:
15588 case P8V_BUILTIN_NAND_V2DF:
15589 case P8V_BUILTIN_NAND_V2DI:
15590 arg0 = gimple_call_arg (stmt, 0);
15591 arg1 = gimple_call_arg (stmt, 1);
15592 lhs = gimple_call_lhs (stmt);
15593 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15594 g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
15595 gimple_set_location (g, gimple_location (stmt));
15596 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15597 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15598 gimple_set_location (g, gimple_location (stmt));
15599 gsi_replace (gsi, g, true);
15601 /* Flavors of vec_or. */
15602 case ALTIVEC_BUILTIN_VOR:
15603 arg0 = gimple_call_arg (stmt, 0);
15604 arg1 = gimple_call_arg (stmt, 1);
15605 lhs = gimple_call_lhs (stmt);
15606 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
15607 gimple_set_location (g, gimple_location (stmt));
15608 gsi_replace (gsi, g, true);
15610 /* flavors of vec_orc. */
15611 case P8V_BUILTIN_ORC_V16QI:
15612 case P8V_BUILTIN_ORC_V8HI:
15613 case P8V_BUILTIN_ORC_V4SI:
15614 case P8V_BUILTIN_ORC_V4SF:
15615 case P8V_BUILTIN_ORC_V2DF:
15616 case P8V_BUILTIN_ORC_V2DI:
15617 arg0 = gimple_call_arg (stmt, 0);
15618 arg1 = gimple_call_arg (stmt, 1);
15619 lhs = gimple_call_lhs (stmt);
15620 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15621 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15622 gimple_set_location (g, gimple_location (stmt));
15623 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15624 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
15625 gimple_set_location (g, gimple_location (stmt));
15626 gsi_replace (gsi, g, true);
15628 /* Flavors of vec_xor. */
15629 case ALTIVEC_BUILTIN_VXOR:
15630 arg0 = gimple_call_arg (stmt, 0);
15631 arg1 = gimple_call_arg (stmt, 1);
15632 lhs = gimple_call_lhs (stmt);
15633 g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
15634 gimple_set_location (g, gimple_location (stmt));
15635 gsi_replace (gsi, g, true);
15637 /* Flavors of vec_nor. */
15638 case ALTIVEC_BUILTIN_VNOR:
15639 arg0 = gimple_call_arg (stmt, 0);
15640 arg1 = gimple_call_arg (stmt, 1);
15641 lhs = gimple_call_lhs (stmt);
15642 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15643 g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
15644 gimple_set_location (g, gimple_location (stmt));
15645 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15646 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15647 gimple_set_location (g, gimple_location (stmt));
15648 gsi_replace (gsi, g, true);
15650 /* flavors of vec_abs. */
15651 case ALTIVEC_BUILTIN_ABS_V16QI:
15652 case ALTIVEC_BUILTIN_ABS_V8HI:
15653 case ALTIVEC_BUILTIN_ABS_V4SI:
15654 case ALTIVEC_BUILTIN_ABS_V4SF:
15655 case P8V_BUILTIN_ABS_V2DI:
15656 case VSX_BUILTIN_XVABSDP:
15657 arg0 = gimple_call_arg (stmt, 0);
15658 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
15659 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
15661 lhs = gimple_call_lhs (stmt);
15662 g = gimple_build_assign (lhs, ABS_EXPR, arg0);
15663 gimple_set_location (g, gimple_location (stmt));
15664 gsi_replace (gsi, g, true);
15666 /* flavors of vec_min. */
15667 case VSX_BUILTIN_XVMINDP:
15668 case P8V_BUILTIN_VMINSD:
15669 case P8V_BUILTIN_VMINUD:
15670 case ALTIVEC_BUILTIN_VMINSB:
15671 case ALTIVEC_BUILTIN_VMINSH:
15672 case ALTIVEC_BUILTIN_VMINSW:
15673 case ALTIVEC_BUILTIN_VMINUB:
15674 case ALTIVEC_BUILTIN_VMINUH:
15675 case ALTIVEC_BUILTIN_VMINUW:
15676 case ALTIVEC_BUILTIN_VMINFP:
15677 arg0 = gimple_call_arg (stmt, 0);
15678 arg1 = gimple_call_arg (stmt, 1);
15679 lhs = gimple_call_lhs (stmt);
15680 g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1);
15681 gimple_set_location (g, gimple_location (stmt));
15682 gsi_replace (gsi, g, true);
15684 /* flavors of vec_max. */
15685 case VSX_BUILTIN_XVMAXDP:
15686 case P8V_BUILTIN_VMAXSD:
15687 case P8V_BUILTIN_VMAXUD:
15688 case ALTIVEC_BUILTIN_VMAXSB:
15689 case ALTIVEC_BUILTIN_VMAXSH:
15690 case ALTIVEC_BUILTIN_VMAXSW:
15691 case ALTIVEC_BUILTIN_VMAXUB:
15692 case ALTIVEC_BUILTIN_VMAXUH:
15693 case ALTIVEC_BUILTIN_VMAXUW:
15694 case ALTIVEC_BUILTIN_VMAXFP:
15695 arg0 = gimple_call_arg (stmt, 0);
15696 arg1 = gimple_call_arg (stmt, 1);
15697 lhs = gimple_call_lhs (stmt);
15698 g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1);
15699 gimple_set_location (g, gimple_location (stmt));
15700 gsi_replace (gsi, g, true);
15702 /* Flavors of vec_eqv. */
15703 case P8V_BUILTIN_EQV_V16QI:
15704 case P8V_BUILTIN_EQV_V8HI:
15705 case P8V_BUILTIN_EQV_V4SI:
15706 case P8V_BUILTIN_EQV_V4SF:
15707 case P8V_BUILTIN_EQV_V2DF:
15708 case P8V_BUILTIN_EQV_V2DI:
15709 arg0 = gimple_call_arg (stmt, 0);
15710 arg1 = gimple_call_arg (stmt, 1);
15711 lhs = gimple_call_lhs (stmt);
15712 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15713 g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
15714 gimple_set_location (g, gimple_location (stmt));
15715 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15716 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15717 gimple_set_location (g, gimple_location (stmt));
15718 gsi_replace (gsi, g, true);
15720 /* Flavors of vec_rotate_left. */
15721 case ALTIVEC_BUILTIN_VRLB:
15722 case ALTIVEC_BUILTIN_VRLH:
15723 case ALTIVEC_BUILTIN_VRLW:
15724 case P8V_BUILTIN_VRLD:
15725 arg0 = gimple_call_arg (stmt, 0);
15726 arg1 = gimple_call_arg (stmt, 1);
15727 lhs = gimple_call_lhs (stmt);
15728 g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
15729 gimple_set_location (g, gimple_location (stmt));
15730 gsi_replace (gsi, g, true);
15732 /* Flavors of vector shift right algebraic.
15733 vec_sra{b,h,w} -> vsra{b,h,w}. */
15734 case ALTIVEC_BUILTIN_VSRAB:
15735 case ALTIVEC_BUILTIN_VSRAH:
15736 case ALTIVEC_BUILTIN_VSRAW:
15737 case P8V_BUILTIN_VSRAD:
15738 arg0 = gimple_call_arg (stmt, 0);
15739 arg1 = gimple_call_arg (stmt, 1);
15740 lhs = gimple_call_lhs (stmt);
15741 g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, arg1);
15742 gimple_set_location (g, gimple_location (stmt));
15743 gsi_replace (gsi, g, true);
15745 /* Flavors of vector shift left.
15746 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
15747 case ALTIVEC_BUILTIN_VSLB:
15748 case ALTIVEC_BUILTIN_VSLH:
15749 case ALTIVEC_BUILTIN_VSLW:
15750 case P8V_BUILTIN_VSLD:
15753 gimple_seq stmts = NULL;
15754 arg0 = gimple_call_arg (stmt, 0);
15755 tree arg0_type = TREE_TYPE (arg0);
15756 if (INTEGRAL_TYPE_P (TREE_TYPE (arg0_type))
15757 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0_type)))
15759 arg1 = gimple_call_arg (stmt, 1);
15760 tree arg1_type = TREE_TYPE (arg1);
15761 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15762 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15763 loc = gimple_location (stmt);
15764 lhs = gimple_call_lhs (stmt);
15765 /* Force arg1 into the range valid matching the arg0 type. */
15766 /* Build a vector consisting of the max valid bit-size values. */
15767 int n_elts = VECTOR_CST_NELTS (arg1);
15768 int tree_size_in_bits = TREE_INT_CST_LOW (size_in_bytes (arg1_type))
15770 tree element_size = build_int_cst (unsigned_element_type,
15771 tree_size_in_bits / n_elts);
15772 tree_vector_builder elts (unsigned_type_for (arg1_type), n_elts, 1);
15773 for (int i = 0; i < n_elts; i++)
15774 elts.safe_push (element_size);
15775 tree modulo_tree = elts.build ();
15776 /* Modulo the provided shift value against that vector. */
15777 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15778 unsigned_arg1_type, arg1);
15779 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15780 unsigned_arg1_type, unsigned_arg1,
15782 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15783 /* And finally, do the shift. */
15784 g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, new_arg1);
15785 gimple_set_location (g, gimple_location (stmt));
15786 gsi_replace (gsi, g, true);
15789 /* Flavors of vector shift right. */
15790 case ALTIVEC_BUILTIN_VSRB:
15791 case ALTIVEC_BUILTIN_VSRH:
15792 case ALTIVEC_BUILTIN_VSRW:
15793 case P8V_BUILTIN_VSRD:
15795 arg0 = gimple_call_arg (stmt, 0);
15796 arg1 = gimple_call_arg (stmt, 1);
15797 lhs = gimple_call_lhs (stmt);
15798 gimple_seq stmts = NULL;
15799 /* Convert arg0 to unsigned. */
15801 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15802 unsigned_type_for (TREE_TYPE (arg0)), arg0);
15804 = gimple_build (&stmts, RSHIFT_EXPR,
15805 TREE_TYPE (arg0_unsigned), arg0_unsigned, arg1);
15806 /* Convert result back to the lhs type. */
15807 res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
15808 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15809 update_call_from_tree (gsi, res);
15812 /* Vector loads. */
15813 case ALTIVEC_BUILTIN_LVX_V16QI:
15814 case ALTIVEC_BUILTIN_LVX_V8HI:
15815 case ALTIVEC_BUILTIN_LVX_V4SI:
15816 case ALTIVEC_BUILTIN_LVX_V4SF:
15817 case ALTIVEC_BUILTIN_LVX_V2DI:
15818 case ALTIVEC_BUILTIN_LVX_V2DF:
15819 case ALTIVEC_BUILTIN_LVX_V1TI:
15821 arg0 = gimple_call_arg (stmt, 0); // offset
15822 arg1 = gimple_call_arg (stmt, 1); // address
15823 lhs = gimple_call_lhs (stmt);
15824 location_t loc = gimple_location (stmt);
15825 /* Since arg1 may be cast to a different type, just use ptr_type_node
15826 here instead of trying to enforce TBAA on pointer types. */
15827 tree arg1_type = ptr_type_node;
15828 tree lhs_type = TREE_TYPE (lhs);
15829 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15830 the tree using the value from arg0. The resulting type will match
15831 the type of arg1. */
15832 gimple_seq stmts = NULL;
15833 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
15834 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15835 arg1_type, arg1, temp_offset);
15836 /* Mask off any lower bits from the address. */
15837 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15838 arg1_type, temp_addr,
15839 build_int_cst (arg1_type, -16));
15840 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15841 if (!is_gimple_mem_ref_addr (aligned_addr))
15843 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
15844 gimple *g = gimple_build_assign (t, aligned_addr);
15845 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15848 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
15849 take an offset, but since we've already incorporated the offset
15850 above, here we just pass in a zero. */
15852 = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
15853 build_int_cst (arg1_type, 0)));
15854 gimple_set_location (g, loc);
15855 gsi_replace (gsi, g, true);
15858 /* Vector stores. */
15859 case ALTIVEC_BUILTIN_STVX_V16QI:
15860 case ALTIVEC_BUILTIN_STVX_V8HI:
15861 case ALTIVEC_BUILTIN_STVX_V4SI:
15862 case ALTIVEC_BUILTIN_STVX_V4SF:
15863 case ALTIVEC_BUILTIN_STVX_V2DI:
15864 case ALTIVEC_BUILTIN_STVX_V2DF:
15866 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
15867 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
15868 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
15869 location_t loc = gimple_location (stmt);
15870 tree arg0_type = TREE_TYPE (arg0);
15871 /* Use ptr_type_node (no TBAA) for the arg2_type.
15872 FIXME: (Richard) "A proper fix would be to transition this type as
15873 seen from the frontend to GIMPLE, for example in a similar way we
15874 do for MEM_REFs by piggy-backing that on an extra argument, a
15875 constant zero pointer of the alias pointer type to use (which would
15876 also serve as a type indicator of the store itself). I'd use a
15877 target specific internal function for this (not sure if we can have
15878 those target specific, but I guess if it's folded away then that's
15879 fine) and get away with the overload set." */
15880 tree arg2_type = ptr_type_node;
15881 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15882 the tree using the value from arg0. The resulting type will match
15883 the type of arg2. */
15884 gimple_seq stmts = NULL;
15885 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
15886 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15887 arg2_type, arg2, temp_offset);
15888 /* Mask off any lower bits from the address. */
15889 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15890 arg2_type, temp_addr,
15891 build_int_cst (arg2_type, -16));
15892 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15893 if (!is_gimple_mem_ref_addr (aligned_addr))
15895 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
15896 gimple *g = gimple_build_assign (t, aligned_addr);
15897 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15900 /* The desired gimple result should be similar to:
15901 MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */
15903 = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr,
15904 build_int_cst (arg2_type, 0)), arg0);
15905 gimple_set_location (g, loc);
15906 gsi_replace (gsi, g, true);
15910 /* unaligned Vector loads. */
15911 case VSX_BUILTIN_LXVW4X_V16QI:
15912 case VSX_BUILTIN_LXVW4X_V8HI:
15913 case VSX_BUILTIN_LXVW4X_V4SF:
15914 case VSX_BUILTIN_LXVW4X_V4SI:
15915 case VSX_BUILTIN_LXVD2X_V2DF:
15916 case VSX_BUILTIN_LXVD2X_V2DI:
15918 arg0 = gimple_call_arg (stmt, 0); // offset
15919 arg1 = gimple_call_arg (stmt, 1); // address
15920 lhs = gimple_call_lhs (stmt);
15921 location_t loc = gimple_location (stmt);
15922 /* Since arg1 may be cast to a different type, just use ptr_type_node
15923 here instead of trying to enforce TBAA on pointer types. */
15924 tree arg1_type = ptr_type_node;
15925 tree lhs_type = TREE_TYPE (lhs);
15926 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
15927 required alignment (power) is 4 bytes regardless of data type. */
15928 tree align_ltype = build_aligned_type (lhs_type, 4);
15929 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15930 the tree using the value from arg0. The resulting type will match
15931 the type of arg1. */
15932 gimple_seq stmts = NULL;
15933 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
15934 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15935 arg1_type, arg1, temp_offset);
15936 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15937 if (!is_gimple_mem_ref_addr (temp_addr))
15939 tree t = make_ssa_name (TREE_TYPE (temp_addr));
15940 gimple *g = gimple_build_assign (t, temp_addr);
15941 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15944 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
15945 take an offset, but since we've already incorporated the offset
15946 above, here we just pass in a zero. */
15948 g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
15949 build_int_cst (arg1_type, 0)));
15950 gimple_set_location (g, loc);
15951 gsi_replace (gsi, g, true);
15955 /* unaligned Vector stores. */
15956 case VSX_BUILTIN_STXVW4X_V16QI:
15957 case VSX_BUILTIN_STXVW4X_V8HI:
15958 case VSX_BUILTIN_STXVW4X_V4SF:
15959 case VSX_BUILTIN_STXVW4X_V4SI:
15960 case VSX_BUILTIN_STXVD2X_V2DF:
15961 case VSX_BUILTIN_STXVD2X_V2DI:
15963 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
15964 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
15965 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
15966 location_t loc = gimple_location (stmt);
15967 tree arg0_type = TREE_TYPE (arg0);
15968 /* Use ptr_type_node (no TBAA) for the arg2_type. */
15969 tree arg2_type = ptr_type_node;
15970 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
15971 required alignment (power) is 4 bytes regardless of data type. */
15972 tree align_stype = build_aligned_type (arg0_type, 4);
15973 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15974 the tree using the value from arg1. */
15975 gimple_seq stmts = NULL;
15976 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
15977 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15978 arg2_type, arg2, temp_offset);
15979 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15980 if (!is_gimple_mem_ref_addr (temp_addr))
15982 tree t = make_ssa_name (TREE_TYPE (temp_addr));
15983 gimple *g = gimple_build_assign (t, temp_addr);
15984 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15988 g = gimple_build_assign (build2 (MEM_REF, align_stype, temp_addr,
15989 build_int_cst (arg2_type, 0)), arg0);
15990 gimple_set_location (g, loc);
15991 gsi_replace (gsi, g, true);
15995 /* Vector Fused multiply-add (fma). */
15996 case ALTIVEC_BUILTIN_VMADDFP:
15997 case VSX_BUILTIN_XVMADDDP:
15998 case ALTIVEC_BUILTIN_VMLADDUHM:
16000 arg0 = gimple_call_arg (stmt, 0);
16001 arg1 = gimple_call_arg (stmt, 1);
16002 tree arg2 = gimple_call_arg (stmt, 2);
16003 lhs = gimple_call_lhs (stmt);
16004 gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2);
16005 gimple_call_set_lhs (g, lhs);
16006 gimple_call_set_nothrow (g, true);
16007 gimple_set_location (g, gimple_location (stmt));
16008 gsi_replace (gsi, g, true);
16012 /* Vector compares; EQ, NE, GE, GT, LE. */
16013 case ALTIVEC_BUILTIN_VCMPEQUB:
16014 case ALTIVEC_BUILTIN_VCMPEQUH:
16015 case ALTIVEC_BUILTIN_VCMPEQUW:
16016 case P8V_BUILTIN_VCMPEQUD:
16017 fold_compare_helper (gsi, EQ_EXPR, stmt);
16020 case P9V_BUILTIN_CMPNEB:
16021 case P9V_BUILTIN_CMPNEH:
16022 case P9V_BUILTIN_CMPNEW:
16023 fold_compare_helper (gsi, NE_EXPR, stmt);
16026 case VSX_BUILTIN_CMPGE_16QI:
16027 case VSX_BUILTIN_CMPGE_U16QI:
16028 case VSX_BUILTIN_CMPGE_8HI:
16029 case VSX_BUILTIN_CMPGE_U8HI:
16030 case VSX_BUILTIN_CMPGE_4SI:
16031 case VSX_BUILTIN_CMPGE_U4SI:
16032 case VSX_BUILTIN_CMPGE_2DI:
16033 case VSX_BUILTIN_CMPGE_U2DI:
16034 fold_compare_helper (gsi, GE_EXPR, stmt);
16037 case ALTIVEC_BUILTIN_VCMPGTSB:
16038 case ALTIVEC_BUILTIN_VCMPGTUB:
16039 case ALTIVEC_BUILTIN_VCMPGTSH:
16040 case ALTIVEC_BUILTIN_VCMPGTUH:
16041 case ALTIVEC_BUILTIN_VCMPGTSW:
16042 case ALTIVEC_BUILTIN_VCMPGTUW:
16043 case P8V_BUILTIN_VCMPGTUD:
16044 case P8V_BUILTIN_VCMPGTSD:
16045 fold_compare_helper (gsi, GT_EXPR, stmt);
16048 case VSX_BUILTIN_CMPLE_16QI:
16049 case VSX_BUILTIN_CMPLE_U16QI:
16050 case VSX_BUILTIN_CMPLE_8HI:
16051 case VSX_BUILTIN_CMPLE_U8HI:
16052 case VSX_BUILTIN_CMPLE_4SI:
16053 case VSX_BUILTIN_CMPLE_U4SI:
16054 case VSX_BUILTIN_CMPLE_2DI:
16055 case VSX_BUILTIN_CMPLE_U2DI:
16056 fold_compare_helper (gsi, LE_EXPR, stmt);
16059 /* flavors of vec_splat_[us]{8,16,32}. */
16060 case ALTIVEC_BUILTIN_VSPLTISB:
16061 case ALTIVEC_BUILTIN_VSPLTISH:
16062 case ALTIVEC_BUILTIN_VSPLTISW:
16065 if (fn_code == ALTIVEC_BUILTIN_VSPLTISB)
16067 else if (fn_code == ALTIVEC_BUILTIN_VSPLTISH)
16072 arg0 = gimple_call_arg (stmt, 0);
16073 lhs = gimple_call_lhs (stmt);
16075 /* Only fold the vec_splat_*() if the lower bits of arg 0 is a
16076 5-bit signed constant in range -16 to +15. */
16077 if (TREE_CODE (arg0) != INTEGER_CST
16078 || !IN_RANGE (sext_hwi (TREE_INT_CST_LOW (arg0), size),
16081 gimple_seq stmts = NULL;
16082 location_t loc = gimple_location (stmt);
16083 tree splat_value = gimple_convert (&stmts, loc,
16084 TREE_TYPE (TREE_TYPE (lhs)), arg0);
16085 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
16086 tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value);
16087 g = gimple_build_assign (lhs, splat_tree);
16088 gimple_set_location (g, gimple_location (stmt));
16089 gsi_replace (gsi, g, true);
16093 /* Flavors of vec_splat. */
16094 /* a = vec_splat (b, 0x3) becomes a = { b[3],b[3],b[3],...}; */
16095 case ALTIVEC_BUILTIN_VSPLTB:
16096 case ALTIVEC_BUILTIN_VSPLTH:
16097 case ALTIVEC_BUILTIN_VSPLTW:
16098 case VSX_BUILTIN_XXSPLTD_V2DI:
16099 case VSX_BUILTIN_XXSPLTD_V2DF:
16101 arg0 = gimple_call_arg (stmt, 0); /* input vector. */
16102 arg1 = gimple_call_arg (stmt, 1); /* index into arg0. */
16103 /* Only fold the vec_splat_*() if arg1 is both a constant value and
16104 is a valid index into the arg0 vector. */
16105 unsigned int n_elts = VECTOR_CST_NELTS (arg0);
16106 if (TREE_CODE (arg1) != INTEGER_CST
16107 || TREE_INT_CST_LOW (arg1) > (n_elts -1))
16109 lhs = gimple_call_lhs (stmt);
16110 tree lhs_type = TREE_TYPE (lhs);
16111 tree arg0_type = TREE_TYPE (arg0);
16113 if (TREE_CODE (arg0) == VECTOR_CST)
16114 splat = VECTOR_CST_ELT (arg0, TREE_INT_CST_LOW (arg1));
16117 /* Determine (in bits) the length and start location of the
16118 splat value for a call to the tree_vec_extract helper. */
16119 int splat_elem_size = TREE_INT_CST_LOW (size_in_bytes (arg0_type))
16120 * BITS_PER_UNIT / n_elts;
16121 int splat_start_bit = TREE_INT_CST_LOW (arg1) * splat_elem_size;
16122 tree len = build_int_cst (bitsizetype, splat_elem_size);
16123 tree start = build_int_cst (bitsizetype, splat_start_bit);
16124 splat = tree_vec_extract (gsi, TREE_TYPE (lhs_type), arg0,
16127 /* And finally, build the new vector. */
16128 tree splat_tree = build_vector_from_val (lhs_type, splat);
16129 g = gimple_build_assign (lhs, splat_tree);
16130 gimple_set_location (g, gimple_location (stmt));
16131 gsi_replace (gsi, g, true);
16135 /* vec_mergel (integrals). */
16136 case ALTIVEC_BUILTIN_VMRGLH:
16137 case ALTIVEC_BUILTIN_VMRGLW:
16138 case VSX_BUILTIN_XXMRGLW_4SI:
16139 case ALTIVEC_BUILTIN_VMRGLB:
16140 case VSX_BUILTIN_VEC_MERGEL_V2DI:
16141 case VSX_BUILTIN_XXMRGLW_4SF:
16142 case VSX_BUILTIN_VEC_MERGEL_V2DF:
16143 fold_mergehl_helper (gsi, stmt, 1);
16145 /* vec_mergeh (integrals). */
16146 case ALTIVEC_BUILTIN_VMRGHH:
16147 case ALTIVEC_BUILTIN_VMRGHW:
16148 case VSX_BUILTIN_XXMRGHW_4SI:
16149 case ALTIVEC_BUILTIN_VMRGHB:
16150 case VSX_BUILTIN_VEC_MERGEH_V2DI:
16151 case VSX_BUILTIN_XXMRGHW_4SF:
16152 case VSX_BUILTIN_VEC_MERGEH_V2DF:
16153 fold_mergehl_helper (gsi, stmt, 0);
16156 /* Flavors of vec_mergee. */
16157 case P8V_BUILTIN_VMRGEW_V4SI:
16158 case P8V_BUILTIN_VMRGEW_V2DI:
16159 case P8V_BUILTIN_VMRGEW_V4SF:
16160 case P8V_BUILTIN_VMRGEW_V2DF:
16161 fold_mergeeo_helper (gsi, stmt, 0);
16163 /* Flavors of vec_mergeo. */
16164 case P8V_BUILTIN_VMRGOW_V4SI:
16165 case P8V_BUILTIN_VMRGOW_V2DI:
16166 case P8V_BUILTIN_VMRGOW_V4SF:
16167 case P8V_BUILTIN_VMRGOW_V2DF:
16168 fold_mergeeo_helper (gsi, stmt, 1);
16171 /* d = vec_pack (a, b) */
16172 case P8V_BUILTIN_VPKUDUM:
16173 case ALTIVEC_BUILTIN_VPKUHUM:
16174 case ALTIVEC_BUILTIN_VPKUWUM:
16176 arg0 = gimple_call_arg (stmt, 0);
16177 arg1 = gimple_call_arg (stmt, 1);
16178 lhs = gimple_call_lhs (stmt);
16179 gimple *g = gimple_build_assign (lhs, VEC_PACK_TRUNC_EXPR, arg0, arg1);
16180 gimple_set_location (g, gimple_location (stmt));
16181 gsi_replace (gsi, g, true);
16185 /* d = vec_unpackh (a) */
16186 /* Note that the UNPACK_{HI,LO}_EXPR used in the gimple_build_assign call
16187 in this code is sensitive to endian-ness, and needs to be inverted to
16188 handle both LE and BE targets. */
16189 case ALTIVEC_BUILTIN_VUPKHSB:
16190 case ALTIVEC_BUILTIN_VUPKHSH:
16191 case P8V_BUILTIN_VUPKHSW:
16193 arg0 = gimple_call_arg (stmt, 0);
16194 lhs = gimple_call_lhs (stmt);
16195 if (BYTES_BIG_ENDIAN)
16196 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
16198 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
16199 gimple_set_location (g, gimple_location (stmt));
16200 gsi_replace (gsi, g, true);
16203 /* d = vec_unpackl (a) */
16204 case ALTIVEC_BUILTIN_VUPKLSB:
16205 case ALTIVEC_BUILTIN_VUPKLSH:
16206 case P8V_BUILTIN_VUPKLSW:
16208 arg0 = gimple_call_arg (stmt, 0);
16209 lhs = gimple_call_lhs (stmt);
16210 if (BYTES_BIG_ENDIAN)
16211 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
16213 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
16214 gimple_set_location (g, gimple_location (stmt));
16215 gsi_replace (gsi, g, true);
16218 /* There is no gimple type corresponding with pixel, so just return. */
16219 case ALTIVEC_BUILTIN_VUPKHPX:
16220 case ALTIVEC_BUILTIN_VUPKLPX:
16224 case ALTIVEC_BUILTIN_VPERM_16QI:
16225 case ALTIVEC_BUILTIN_VPERM_8HI:
16226 case ALTIVEC_BUILTIN_VPERM_4SI:
16227 case ALTIVEC_BUILTIN_VPERM_2DI:
16228 case ALTIVEC_BUILTIN_VPERM_4SF:
16229 case ALTIVEC_BUILTIN_VPERM_2DF:
16231 arg0 = gimple_call_arg (stmt, 0);
16232 arg1 = gimple_call_arg (stmt, 1);
16233 tree permute = gimple_call_arg (stmt, 2);
16234 lhs = gimple_call_lhs (stmt);
16235 location_t loc = gimple_location (stmt);
16236 gimple_seq stmts = NULL;
16237 // convert arg0 and arg1 to match the type of the permute
16238 // for the VEC_PERM_EXPR operation.
16239 tree permute_type = (TREE_TYPE (permute));
16240 tree arg0_ptype = gimple_convert (&stmts, loc, permute_type, arg0);
16241 tree arg1_ptype = gimple_convert (&stmts, loc, permute_type, arg1);
16242 tree lhs_ptype = gimple_build (&stmts, loc, VEC_PERM_EXPR,
16243 permute_type, arg0_ptype, arg1_ptype,
16245 // Convert the result back to the desired lhs type upon completion.
16246 tree temp = gimple_convert (&stmts, loc, TREE_TYPE (lhs), lhs_ptype);
16247 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
16248 g = gimple_build_assign (lhs, temp);
16249 gimple_set_location (g, loc);
16250 gsi_replace (gsi, g, true);
16255 if (TARGET_DEBUG_BUILTIN)
16256 fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
16257 fn_code, fn_name1, fn_name2);
16264 /* Expand an expression EXP that calls a built-in function,
16265 with result going to TARGET if that's convenient
16266 (and in mode MODE if that's convenient).
16267 SUBTARGET may be used as the target for computing one of EXP's operands.
16268 IGNORE is nonzero if the value is to be ignored. */
16271 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16272 machine_mode mode ATTRIBUTE_UNUSED,
16273 int ignore ATTRIBUTE_UNUSED)
16275 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16276 enum rs6000_builtins fcode
16277 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
16278 size_t uns_fcode = (size_t)fcode;
16279 const struct builtin_description *d;
16283 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
16284 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
16285 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
16287 /* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
16288 floating point type, depending on whether long double is the IBM extended
16289 double (KFmode) or long double is IEEE 128-bit (TFmode). It is simpler if
16290 we only define one variant of the built-in function, and switch the code
16291 when defining it, rather than defining two built-ins and using the
16292 overload table in rs6000-c.c to switch between the two. If we don't have
16293 the proper assembler, don't do this switch because CODE_FOR_*kf* and
16294 CODE_FOR_*tf* will be CODE_FOR_nothing. */
16295 if (FLOAT128_IEEE_P (TFmode))
16301 case CODE_FOR_sqrtkf2_odd: icode = CODE_FOR_sqrttf2_odd; break;
16302 case CODE_FOR_trunckfdf2_odd: icode = CODE_FOR_trunctfdf2_odd; break;
16303 case CODE_FOR_addkf3_odd: icode = CODE_FOR_addtf3_odd; break;
16304 case CODE_FOR_subkf3_odd: icode = CODE_FOR_subtf3_odd; break;
16305 case CODE_FOR_mulkf3_odd: icode = CODE_FOR_multf3_odd; break;
16306 case CODE_FOR_divkf3_odd: icode = CODE_FOR_divtf3_odd; break;
16307 case CODE_FOR_fmakf4_odd: icode = CODE_FOR_fmatf4_odd; break;
16308 case CODE_FOR_xsxexpqp_kf: icode = CODE_FOR_xsxexpqp_tf; break;
16309 case CODE_FOR_xsxsigqp_kf: icode = CODE_FOR_xsxsigqp_tf; break;
16310 case CODE_FOR_xststdcnegqp_kf: icode = CODE_FOR_xststdcnegqp_tf; break;
16311 case CODE_FOR_xsiexpqp_kf: icode = CODE_FOR_xsiexpqp_tf; break;
16312 case CODE_FOR_xsiexpqpf_kf: icode = CODE_FOR_xsiexpqpf_tf; break;
16313 case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
16316 if (TARGET_DEBUG_BUILTIN)
16318 const char *name1 = rs6000_builtin_info[uns_fcode].name;
16319 const char *name2 = (icode != CODE_FOR_nothing)
16320 ? get_insn_name ((int) icode)
16324 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
16326 default: name3 = "unknown"; break;
16327 case RS6000_BTC_SPECIAL: name3 = "special"; break;
16328 case RS6000_BTC_UNARY: name3 = "unary"; break;
16329 case RS6000_BTC_BINARY: name3 = "binary"; break;
16330 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
16331 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
16332 case RS6000_BTC_ABS: name3 = "abs"; break;
16333 case RS6000_BTC_DST: name3 = "dst"; break;
16338 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
16339 (name1) ? name1 : "---", fcode,
16340 (name2) ? name2 : "---", (int) icode,
16342 func_valid_p ? "" : ", not valid");
16347 rs6000_invalid_builtin (fcode);
16349 /* Given it is invalid, just generate a normal call. */
16350 return expand_call (exp, target, ignore);
16355 case RS6000_BUILTIN_RECIP:
16356 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
16358 case RS6000_BUILTIN_RECIPF:
16359 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
16361 case RS6000_BUILTIN_RSQRTF:
16362 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
16364 case RS6000_BUILTIN_RSQRT:
16365 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
16367 case POWER7_BUILTIN_BPERMD:
16368 return rs6000_expand_binop_builtin (((TARGET_64BIT)
16369 ? CODE_FOR_bpermd_di
16370 : CODE_FOR_bpermd_si), exp, target);
16372 case RS6000_BUILTIN_GET_TB:
16373 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16376 case RS6000_BUILTIN_MFTB:
16377 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16378 ? CODE_FOR_rs6000_mftb_di
16379 : CODE_FOR_rs6000_mftb_si),
16382 case RS6000_BUILTIN_MFFS:
16383 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16385 case RS6000_BUILTIN_MTFSB0:
16386 return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
16388 case RS6000_BUILTIN_MTFSB1:
16389 return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
16391 case RS6000_BUILTIN_SET_FPSCR_RN:
16392 return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
16395 case RS6000_BUILTIN_SET_FPSCR_DRN:
16397 rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
16400 case RS6000_BUILTIN_MFFSL:
16401 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
16403 case RS6000_BUILTIN_MTFSF:
16404 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16406 case RS6000_BUILTIN_CPU_INIT:
16407 case RS6000_BUILTIN_CPU_IS:
16408 case RS6000_BUILTIN_CPU_SUPPORTS:
16409 return cpu_expand_builtin (fcode, exp, target);
16411 case MISC_BUILTIN_SPEC_BARRIER:
16413 emit_insn (gen_speculation_barrier ());
16417 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16418 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16420 int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16421 : (int) CODE_FOR_altivec_lvsl_direct);
16422 machine_mode tmode = insn_data[icode2].operand[0].mode;
16423 machine_mode mode = insn_data[icode2].operand[1].mode;
16427 gcc_assert (TARGET_ALTIVEC);
16429 arg = CALL_EXPR_ARG (exp, 0);
16430 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16431 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16432 addr = memory_address (mode, op);
16433 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16437 /* For the load case need to negate the address. */
16438 op = gen_reg_rtx (GET_MODE (addr));
16439 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16441 op = gen_rtx_MEM (mode, op);
16444 || GET_MODE (target) != tmode
16445 || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
16446 target = gen_reg_rtx (tmode);
16448 pat = GEN_FCN (icode2) (target, op);
16456 case ALTIVEC_BUILTIN_VCFUX:
16457 case ALTIVEC_BUILTIN_VCFSX:
16458 case ALTIVEC_BUILTIN_VCTUXS:
16459 case ALTIVEC_BUILTIN_VCTSXS:
16460 /* FIXME: There's got to be a nicer way to handle this case than
16461 constructing a new CALL_EXPR. */
16462 if (call_expr_nargs (exp) == 1)
16464 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16465 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16469 /* For the pack and unpack int128 routines, fix up the builtin so it
16470 uses the correct IBM128 type. */
16471 case MISC_BUILTIN_PACK_IF:
16472 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
16474 icode = CODE_FOR_packtf;
16475 fcode = MISC_BUILTIN_PACK_TF;
16476 uns_fcode = (size_t)fcode;
16480 case MISC_BUILTIN_UNPACK_IF:
16481 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
16483 icode = CODE_FOR_unpacktf;
16484 fcode = MISC_BUILTIN_UNPACK_TF;
16485 uns_fcode = (size_t)fcode;
16493 if (TARGET_ALTIVEC)
16495 ret = altivec_expand_builtin (exp, target, &success);
16502 ret = htm_expand_builtin (exp, target, &success);
16508 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16509 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16510 gcc_assert (attr == RS6000_BTC_UNARY
16511 || attr == RS6000_BTC_BINARY
16512 || attr == RS6000_BTC_TERNARY
16513 || attr == RS6000_BTC_SPECIAL);
16515 /* Handle simple unary operations. */
16517 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16518 if (d->code == fcode)
16519 return rs6000_expand_unop_builtin (icode, exp, target);
16521 /* Handle simple binary operations. */
16523 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16524 if (d->code == fcode)
16525 return rs6000_expand_binop_builtin (icode, exp, target);
16527 /* Handle simple ternary operations. */
16529 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16530 if (d->code == fcode)
16531 return rs6000_expand_ternop_builtin (icode, exp, target);
16533 /* Handle simple no-argument operations. */
16535 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16536 if (d->code == fcode)
16537 return rs6000_expand_zeroop_builtin (icode, target);
16539 gcc_unreachable ();
16542 /* Create a builtin vector type with a name. Taking care not to give
16543 the canonical type a name. */
16546 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
16548 tree result = build_vector_type (elt_type, num_elts);
16550 /* Copy so we don't give the canonical type a name. */
16551 result = build_variant_type_copy (result);
16553 add_builtin_type (name, result);
16559 rs6000_init_builtins (void)
16565 if (TARGET_DEBUG_BUILTIN)
16566 fprintf (stderr, "rs6000_init_builtins%s%s\n",
16567 (TARGET_ALTIVEC) ? ", altivec" : "",
16568 (TARGET_VSX) ? ", vsx" : "");
16570 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
16571 : "__vector long long",
16572 intDI_type_node, 2);
16573 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
16574 V4SI_type_node = rs6000_vector_type ("__vector signed int",
16575 intSI_type_node, 4);
16576 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
16577 V8HI_type_node = rs6000_vector_type ("__vector signed short",
16578 intHI_type_node, 8);
16579 V16QI_type_node = rs6000_vector_type ("__vector signed char",
16580 intQI_type_node, 16);
16582 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
16583 unsigned_intQI_type_node, 16);
16584 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
16585 unsigned_intHI_type_node, 8);
16586 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
16587 unsigned_intSI_type_node, 4);
16588 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16589 ? "__vector unsigned long"
16590 : "__vector unsigned long long",
16591 unsigned_intDI_type_node, 2);
16593 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16595 const_str_type_node
16596 = build_pointer_type (build_qualified_type (char_type_node,
16599 /* We use V1TI mode as a special container to hold __int128_t items that
16600 must live in VSX registers. */
16601 if (intTI_type_node)
16603 V1TI_type_node = rs6000_vector_type ("__vector __int128",
16604 intTI_type_node, 1);
16605 unsigned_V1TI_type_node
16606 = rs6000_vector_type ("__vector unsigned __int128",
16607 unsigned_intTI_type_node, 1);
16610 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16611 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16612 'vector unsigned short'. */
16614 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16615 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16616 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16617 bool_long_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16618 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16620 long_integer_type_internal_node = long_integer_type_node;
16621 long_unsigned_type_internal_node = long_unsigned_type_node;
16622 long_long_integer_type_internal_node = long_long_integer_type_node;
16623 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16624 intQI_type_internal_node = intQI_type_node;
16625 uintQI_type_internal_node = unsigned_intQI_type_node;
16626 intHI_type_internal_node = intHI_type_node;
16627 uintHI_type_internal_node = unsigned_intHI_type_node;
16628 intSI_type_internal_node = intSI_type_node;
16629 uintSI_type_internal_node = unsigned_intSI_type_node;
16630 intDI_type_internal_node = intDI_type_node;
16631 uintDI_type_internal_node = unsigned_intDI_type_node;
16632 intTI_type_internal_node = intTI_type_node;
16633 uintTI_type_internal_node = unsigned_intTI_type_node;
16634 float_type_internal_node = float_type_node;
16635 double_type_internal_node = double_type_node;
16636 long_double_type_internal_node = long_double_type_node;
16637 dfloat64_type_internal_node = dfloat64_type_node;
16638 dfloat128_type_internal_node = dfloat128_type_node;
16639 void_type_internal_node = void_type_node;
16641 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16642 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16643 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16644 format that uses a pair of doubles, depending on the switches and
16647 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16648 floating point, we need make sure the type is non-zero or else self-test
16649 fails during bootstrap.
16651 Always create __ibm128 as a separate type, even if the current long double
16652 format is IBM extended double.
16654 For IEEE 128-bit floating point, always create the type __ieee128. If the
16655 user used -mfloat128, rs6000-c.c will create a define from __float128 to
16657 if (TARGET_FLOAT128_TYPE)
16659 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
16660 ibm128_float_type_node = long_double_type_node;
16663 ibm128_float_type_node = make_node (REAL_TYPE);
16664 TYPE_PRECISION (ibm128_float_type_node) = 128;
16665 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16666 layout_type (ibm128_float_type_node);
16669 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16672 if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
16673 ieee128_float_type_node = long_double_type_node;
16675 ieee128_float_type_node = float128_type_node;
16677 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16682 ieee128_float_type_node = ibm128_float_type_node = long_double_type_node;
16684 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16686 builtin_mode_to_type[QImode][0] = integer_type_node;
16687 builtin_mode_to_type[HImode][0] = integer_type_node;
16688 builtin_mode_to_type[SImode][0] = intSI_type_node;
16689 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16690 builtin_mode_to_type[DImode][0] = intDI_type_node;
16691 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16692 builtin_mode_to_type[TImode][0] = intTI_type_node;
16693 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16694 builtin_mode_to_type[SFmode][0] = float_type_node;
16695 builtin_mode_to_type[DFmode][0] = double_type_node;
16696 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16697 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16698 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16699 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16700 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16701 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16702 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16703 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16704 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16705 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16706 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16707 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16708 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16709 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16710 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16711 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16712 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16714 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16715 TYPE_NAME (bool_char_type_node) = tdecl;
16717 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16718 TYPE_NAME (bool_short_type_node) = tdecl;
16720 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16721 TYPE_NAME (bool_int_type_node) = tdecl;
16723 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16724 TYPE_NAME (pixel_type_node) = tdecl;
16726 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
16727 bool_char_type_node, 16);
16728 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
16729 bool_short_type_node, 8);
16730 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
16731 bool_int_type_node, 4);
16732 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16733 ? "__vector __bool long"
16734 : "__vector __bool long long",
16735 bool_long_long_type_node, 2);
16736 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
16737 pixel_type_node, 8);
16739 /* Create Altivec and VSX builtins on machines with at least the
16740 general purpose extensions (970 and newer) to allow the use of
16741 the target attribute. */
16742 if (TARGET_EXTRA_BUILTINS)
16743 altivec_init_builtins ();
16745 htm_init_builtins ();
16747 if (TARGET_EXTRA_BUILTINS)
16748 rs6000_common_init_builtins ();
16750 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16751 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16752 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16754 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16755 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16756 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16758 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16759 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16760 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16762 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16763 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16764 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16766 mode = (TARGET_64BIT) ? DImode : SImode;
16767 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16768 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16769 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16771 ftype = build_function_type_list (unsigned_intDI_type_node,
16773 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16776 ftype = build_function_type_list (unsigned_intDI_type_node,
16779 ftype = build_function_type_list (unsigned_intSI_type_node,
16781 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16783 ftype = build_function_type_list (double_type_node, NULL_TREE);
16784 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16786 ftype = build_function_type_list (double_type_node, NULL_TREE);
16787 def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL);
16789 ftype = build_function_type_list (void_type_node,
16792 def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0);
16794 ftype = build_function_type_list (void_type_node,
16797 def_builtin ("__builtin_mtfsb1", ftype, RS6000_BUILTIN_MTFSB1);
16799 ftype = build_function_type_list (void_type_node,
16802 def_builtin ("__builtin_set_fpscr_rn", ftype, RS6000_BUILTIN_SET_FPSCR_RN);
16804 ftype = build_function_type_list (void_type_node,
16807 def_builtin ("__builtin_set_fpscr_drn", ftype, RS6000_BUILTIN_SET_FPSCR_DRN);
16809 ftype = build_function_type_list (void_type_node,
16810 intSI_type_node, double_type_node,
16812 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16814 ftype = build_function_type_list (void_type_node, NULL_TREE);
16815 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16816 def_builtin ("__builtin_ppc_speculation_barrier", ftype,
16817 MISC_BUILTIN_SPEC_BARRIER);
16819 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16821 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16822 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16824 /* AIX libm provides clog as __clog. */
16825 if (TARGET_XCOFF &&
16826 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
16827 set_user_assembler_name (tdecl, "__clog");
16829 #ifdef SUBTARGET_INIT_BUILTINS
16830 SUBTARGET_INIT_BUILTINS;
16834 /* Returns the rs6000 builtin decl for CODE. */
16837 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
16839 HOST_WIDE_INT fnmask;
16841 if (code >= RS6000_BUILTIN_COUNT)
16842 return error_mark_node;
16844 fnmask = rs6000_builtin_info[code].mask;
16845 if ((fnmask & rs6000_builtin_mask) != fnmask)
16847 rs6000_invalid_builtin ((enum rs6000_builtins)code);
16848 return error_mark_node;
16851 return rs6000_builtin_decls[code];
16855 altivec_init_builtins (void)
16857 const struct builtin_description *d;
16861 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16863 tree pvoid_type_node = build_pointer_type (void_type_node);
16865 tree pcvoid_type_node
16866 = build_pointer_type (build_qualified_type (void_type_node,
16869 tree int_ftype_opaque
16870 = build_function_type_list (integer_type_node,
16871 opaque_V4SI_type_node, NULL_TREE);
16872 tree opaque_ftype_opaque
16873 = build_function_type_list (integer_type_node, NULL_TREE);
16874 tree opaque_ftype_opaque_int
16875 = build_function_type_list (opaque_V4SI_type_node,
16876 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
16877 tree opaque_ftype_opaque_opaque_int
16878 = build_function_type_list (opaque_V4SI_type_node,
16879 opaque_V4SI_type_node, opaque_V4SI_type_node,
16880 integer_type_node, NULL_TREE);
16881 tree opaque_ftype_opaque_opaque_opaque
16882 = build_function_type_list (opaque_V4SI_type_node,
16883 opaque_V4SI_type_node, opaque_V4SI_type_node,
16884 opaque_V4SI_type_node, NULL_TREE);
16885 tree opaque_ftype_opaque_opaque
16886 = build_function_type_list (opaque_V4SI_type_node,
16887 opaque_V4SI_type_node, opaque_V4SI_type_node,
16889 tree int_ftype_int_opaque_opaque
16890 = build_function_type_list (integer_type_node,
16891 integer_type_node, opaque_V4SI_type_node,
16892 opaque_V4SI_type_node, NULL_TREE);
16893 tree int_ftype_int_v4si_v4si
16894 = build_function_type_list (integer_type_node,
16895 integer_type_node, V4SI_type_node,
16896 V4SI_type_node, NULL_TREE);
16897 tree int_ftype_int_v2di_v2di
16898 = build_function_type_list (integer_type_node,
16899 integer_type_node, V2DI_type_node,
16900 V2DI_type_node, NULL_TREE);
16901 tree void_ftype_v4si
16902 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
16903 tree v8hi_ftype_void
16904 = build_function_type_list (V8HI_type_node, NULL_TREE);
16905 tree void_ftype_void
16906 = build_function_type_list (void_type_node, NULL_TREE);
16907 tree void_ftype_int
16908 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
16910 tree opaque_ftype_long_pcvoid
16911 = build_function_type_list (opaque_V4SI_type_node,
16912 long_integer_type_node, pcvoid_type_node,
16914 tree v16qi_ftype_long_pcvoid
16915 = build_function_type_list (V16QI_type_node,
16916 long_integer_type_node, pcvoid_type_node,
16918 tree v8hi_ftype_long_pcvoid
16919 = build_function_type_list (V8HI_type_node,
16920 long_integer_type_node, pcvoid_type_node,
16922 tree v4si_ftype_long_pcvoid
16923 = build_function_type_list (V4SI_type_node,
16924 long_integer_type_node, pcvoid_type_node,
16926 tree v4sf_ftype_long_pcvoid
16927 = build_function_type_list (V4SF_type_node,
16928 long_integer_type_node, pcvoid_type_node,
16930 tree v2df_ftype_long_pcvoid
16931 = build_function_type_list (V2DF_type_node,
16932 long_integer_type_node, pcvoid_type_node,
16934 tree v2di_ftype_long_pcvoid
16935 = build_function_type_list (V2DI_type_node,
16936 long_integer_type_node, pcvoid_type_node,
16938 tree v1ti_ftype_long_pcvoid
16939 = build_function_type_list (V1TI_type_node,
16940 long_integer_type_node, pcvoid_type_node,
16943 tree void_ftype_opaque_long_pvoid
16944 = build_function_type_list (void_type_node,
16945 opaque_V4SI_type_node, long_integer_type_node,
16946 pvoid_type_node, NULL_TREE);
16947 tree void_ftype_v4si_long_pvoid
16948 = build_function_type_list (void_type_node,
16949 V4SI_type_node, long_integer_type_node,
16950 pvoid_type_node, NULL_TREE);
16951 tree void_ftype_v16qi_long_pvoid
16952 = build_function_type_list (void_type_node,
16953 V16QI_type_node, long_integer_type_node,
16954 pvoid_type_node, NULL_TREE);
16956 tree void_ftype_v16qi_pvoid_long
16957 = build_function_type_list (void_type_node,
16958 V16QI_type_node, pvoid_type_node,
16959 long_integer_type_node, NULL_TREE);
16961 tree void_ftype_v8hi_long_pvoid
16962 = build_function_type_list (void_type_node,
16963 V8HI_type_node, long_integer_type_node,
16964 pvoid_type_node, NULL_TREE);
16965 tree void_ftype_v4sf_long_pvoid
16966 = build_function_type_list (void_type_node,
16967 V4SF_type_node, long_integer_type_node,
16968 pvoid_type_node, NULL_TREE);
16969 tree void_ftype_v2df_long_pvoid
16970 = build_function_type_list (void_type_node,
16971 V2DF_type_node, long_integer_type_node,
16972 pvoid_type_node, NULL_TREE);
16973 tree void_ftype_v1ti_long_pvoid
16974 = build_function_type_list (void_type_node,
16975 V1TI_type_node, long_integer_type_node,
16976 pvoid_type_node, NULL_TREE);
16977 tree void_ftype_v2di_long_pvoid
16978 = build_function_type_list (void_type_node,
16979 V2DI_type_node, long_integer_type_node,
16980 pvoid_type_node, NULL_TREE);
16981 tree int_ftype_int_v8hi_v8hi
16982 = build_function_type_list (integer_type_node,
16983 integer_type_node, V8HI_type_node,
16984 V8HI_type_node, NULL_TREE);
16985 tree int_ftype_int_v16qi_v16qi
16986 = build_function_type_list (integer_type_node,
16987 integer_type_node, V16QI_type_node,
16988 V16QI_type_node, NULL_TREE);
16989 tree int_ftype_int_v4sf_v4sf
16990 = build_function_type_list (integer_type_node,
16991 integer_type_node, V4SF_type_node,
16992 V4SF_type_node, NULL_TREE);
16993 tree int_ftype_int_v2df_v2df
16994 = build_function_type_list (integer_type_node,
16995 integer_type_node, V2DF_type_node,
16996 V2DF_type_node, NULL_TREE);
16997 tree v2di_ftype_v2di
16998 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
16999 tree v4si_ftype_v4si
17000 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17001 tree v8hi_ftype_v8hi
17002 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17003 tree v16qi_ftype_v16qi
17004 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17005 tree v4sf_ftype_v4sf
17006 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17007 tree v2df_ftype_v2df
17008 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17009 tree void_ftype_pcvoid_int_int
17010 = build_function_type_list (void_type_node,
17011 pcvoid_type_node, integer_type_node,
17012 integer_type_node, NULL_TREE);
17014 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
17015 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
17016 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
17017 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
17018 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
17019 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
17020 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
17021 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
17022 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
17023 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
17024 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
17025 ALTIVEC_BUILTIN_LVXL_V2DF);
17026 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
17027 ALTIVEC_BUILTIN_LVXL_V2DI);
17028 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
17029 ALTIVEC_BUILTIN_LVXL_V4SF);
17030 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
17031 ALTIVEC_BUILTIN_LVXL_V4SI);
17032 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
17033 ALTIVEC_BUILTIN_LVXL_V8HI);
17034 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
17035 ALTIVEC_BUILTIN_LVXL_V16QI);
17036 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
17037 def_builtin ("__builtin_altivec_lvx_v1ti", v1ti_ftype_long_pcvoid,
17038 ALTIVEC_BUILTIN_LVX_V1TI);
17039 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
17040 ALTIVEC_BUILTIN_LVX_V2DF);
17041 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
17042 ALTIVEC_BUILTIN_LVX_V2DI);
17043 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
17044 ALTIVEC_BUILTIN_LVX_V4SF);
17045 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
17046 ALTIVEC_BUILTIN_LVX_V4SI);
17047 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
17048 ALTIVEC_BUILTIN_LVX_V8HI);
17049 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
17050 ALTIVEC_BUILTIN_LVX_V16QI);
17051 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
17052 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
17053 ALTIVEC_BUILTIN_STVX_V2DF);
17054 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
17055 ALTIVEC_BUILTIN_STVX_V2DI);
17056 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
17057 ALTIVEC_BUILTIN_STVX_V4SF);
17058 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
17059 ALTIVEC_BUILTIN_STVX_V4SI);
17060 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
17061 ALTIVEC_BUILTIN_STVX_V8HI);
17062 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
17063 ALTIVEC_BUILTIN_STVX_V16QI);
17064 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
17065 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
17066 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
17067 ALTIVEC_BUILTIN_STVXL_V2DF);
17068 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
17069 ALTIVEC_BUILTIN_STVXL_V2DI);
17070 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
17071 ALTIVEC_BUILTIN_STVXL_V4SF);
17072 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
17073 ALTIVEC_BUILTIN_STVXL_V4SI);
17074 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
17075 ALTIVEC_BUILTIN_STVXL_V8HI);
17076 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
17077 ALTIVEC_BUILTIN_STVXL_V16QI);
17078 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
17079 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
17080 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
17081 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
17082 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
17083 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
17084 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
17085 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
17086 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
17087 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
17088 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
17089 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
17090 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
17091 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
17092 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
17093 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
17095 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
17096 VSX_BUILTIN_LXVD2X_V2DF);
17097 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
17098 VSX_BUILTIN_LXVD2X_V2DI);
17099 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
17100 VSX_BUILTIN_LXVW4X_V4SF);
17101 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
17102 VSX_BUILTIN_LXVW4X_V4SI);
17103 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
17104 VSX_BUILTIN_LXVW4X_V8HI);
17105 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
17106 VSX_BUILTIN_LXVW4X_V16QI);
17107 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
17108 VSX_BUILTIN_STXVD2X_V2DF);
17109 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
17110 VSX_BUILTIN_STXVD2X_V2DI);
17111 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
17112 VSX_BUILTIN_STXVW4X_V4SF);
17113 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
17114 VSX_BUILTIN_STXVW4X_V4SI);
17115 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
17116 VSX_BUILTIN_STXVW4X_V8HI);
17117 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
17118 VSX_BUILTIN_STXVW4X_V16QI);
17120 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
17121 VSX_BUILTIN_LD_ELEMREV_V2DF);
17122 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
17123 VSX_BUILTIN_LD_ELEMREV_V2DI);
17124 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
17125 VSX_BUILTIN_LD_ELEMREV_V4SF);
17126 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
17127 VSX_BUILTIN_LD_ELEMREV_V4SI);
17128 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
17129 VSX_BUILTIN_LD_ELEMREV_V8HI);
17130 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
17131 VSX_BUILTIN_LD_ELEMREV_V16QI);
17132 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
17133 VSX_BUILTIN_ST_ELEMREV_V2DF);
17134 def_builtin ("__builtin_vsx_st_elemrev_v1ti", void_ftype_v1ti_long_pvoid,
17135 VSX_BUILTIN_ST_ELEMREV_V1TI);
17136 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
17137 VSX_BUILTIN_ST_ELEMREV_V2DI);
17138 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
17139 VSX_BUILTIN_ST_ELEMREV_V4SF);
17140 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
17141 VSX_BUILTIN_ST_ELEMREV_V4SI);
17142 def_builtin ("__builtin_vsx_st_elemrev_v8hi", void_ftype_v8hi_long_pvoid,
17143 VSX_BUILTIN_ST_ELEMREV_V8HI);
17144 def_builtin ("__builtin_vsx_st_elemrev_v16qi", void_ftype_v16qi_long_pvoid,
17145 VSX_BUILTIN_ST_ELEMREV_V16QI);
17147 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
17148 VSX_BUILTIN_VEC_LD);
17149 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
17150 VSX_BUILTIN_VEC_ST);
17151 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
17152 VSX_BUILTIN_VEC_XL);
17153 def_builtin ("__builtin_vec_xl_be", opaque_ftype_long_pcvoid,
17154 VSX_BUILTIN_VEC_XL_BE);
17155 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
17156 VSX_BUILTIN_VEC_XST);
17157 def_builtin ("__builtin_vec_xst_be", void_ftype_opaque_long_pvoid,
17158 VSX_BUILTIN_VEC_XST_BE);
17160 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
17161 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
17162 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
17164 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
17165 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
17166 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
17167 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
17168 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
17169 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
17170 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
17171 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
17172 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
17173 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
17174 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
17175 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
17177 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
17178 ALTIVEC_BUILTIN_VEC_ADDE);
17179 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
17180 ALTIVEC_BUILTIN_VEC_ADDEC);
17181 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
17182 ALTIVEC_BUILTIN_VEC_CMPNE);
17183 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
17184 ALTIVEC_BUILTIN_VEC_MUL);
17185 def_builtin ("__builtin_vec_sube", opaque_ftype_opaque_opaque_opaque,
17186 ALTIVEC_BUILTIN_VEC_SUBE);
17187 def_builtin ("__builtin_vec_subec", opaque_ftype_opaque_opaque_opaque,
17188 ALTIVEC_BUILTIN_VEC_SUBEC);
17190 /* Cell builtins. */
17191 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
17192 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
17193 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
17194 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
17196 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
17197 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
17198 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
17199 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
17201 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
17202 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
17203 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
17204 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
17206 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
17207 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
17208 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
17209 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
17211 if (TARGET_P9_VECTOR)
17213 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
17214 P9V_BUILTIN_STXVL);
17215 def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long,
17216 P9V_BUILTIN_XST_LEN_R);
17219 /* Add the DST variants. */
17221 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
17223 HOST_WIDE_INT mask = d->mask;
17225 /* It is expected that these dst built-in functions may have
17226 d->icode equal to CODE_FOR_nothing. */
17227 if ((mask & builtin_mask) != mask)
17229 if (TARGET_DEBUG_BUILTIN)
17230 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
17234 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
17237 /* Initialize the predicates. */
17238 d = bdesc_altivec_preds;
17239 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
17241 machine_mode mode1;
17243 HOST_WIDE_INT mask = d->mask;
17245 if ((mask & builtin_mask) != mask)
17247 if (TARGET_DEBUG_BUILTIN)
17248 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
17253 if (rs6000_overloaded_builtin_p (d->code))
17257 /* Cannot define builtin if the instruction is disabled. */
17258 gcc_assert (d->icode != CODE_FOR_nothing);
17259 mode1 = insn_data[d->icode].operand[1].mode;
17265 type = int_ftype_int_opaque_opaque;
17268 type = int_ftype_int_v2di_v2di;
17271 type = int_ftype_int_v4si_v4si;
17274 type = int_ftype_int_v8hi_v8hi;
17277 type = int_ftype_int_v16qi_v16qi;
17280 type = int_ftype_int_v4sf_v4sf;
17283 type = int_ftype_int_v2df_v2df;
17286 gcc_unreachable ();
17289 def_builtin (d->name, type, d->code);
17292 /* Initialize the abs* operators. */
17294 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
17296 machine_mode mode0;
17298 HOST_WIDE_INT mask = d->mask;
17300 if ((mask & builtin_mask) != mask)
17302 if (TARGET_DEBUG_BUILTIN)
17303 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
17308 /* Cannot define builtin if the instruction is disabled. */
17309 gcc_assert (d->icode != CODE_FOR_nothing);
17310 mode0 = insn_data[d->icode].operand[0].mode;
17315 type = v2di_ftype_v2di;
17318 type = v4si_ftype_v4si;
17321 type = v8hi_ftype_v8hi;
17324 type = v16qi_ftype_v16qi;
17327 type = v4sf_ftype_v4sf;
17330 type = v2df_ftype_v2df;
17333 gcc_unreachable ();
17336 def_builtin (d->name, type, d->code);
17339 /* Initialize target builtin that implements
17340 targetm.vectorize.builtin_mask_for_load. */
17342 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
17343 v16qi_ftype_long_pcvoid,
17344 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
17345 BUILT_IN_MD, NULL, NULL_TREE);
17346 TREE_READONLY (decl) = 1;
17347 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
17348 altivec_builtin_mask_for_load = decl;
17350 /* Access to the vec_init patterns. */
17351 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
17352 integer_type_node, integer_type_node,
17353 integer_type_node, NULL_TREE);
17354 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
17356 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
17357 short_integer_type_node,
17358 short_integer_type_node,
17359 short_integer_type_node,
17360 short_integer_type_node,
17361 short_integer_type_node,
17362 short_integer_type_node,
17363 short_integer_type_node, NULL_TREE);
17364 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
17366 ftype = build_function_type_list (V16QI_type_node, char_type_node,
17367 char_type_node, char_type_node,
17368 char_type_node, char_type_node,
17369 char_type_node, char_type_node,
17370 char_type_node, char_type_node,
17371 char_type_node, char_type_node,
17372 char_type_node, char_type_node,
17373 char_type_node, char_type_node,
17374 char_type_node, NULL_TREE);
17375 def_builtin ("__builtin_vec_init_v16qi", ftype,
17376 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
17378 ftype = build_function_type_list (V4SF_type_node, float_type_node,
17379 float_type_node, float_type_node,
17380 float_type_node, NULL_TREE);
17381 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
17383 /* VSX builtins. */
17384 ftype = build_function_type_list (V2DF_type_node, double_type_node,
17385 double_type_node, NULL_TREE);
17386 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
17388 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
17389 intDI_type_node, NULL_TREE);
17390 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
17392 /* Access to the vec_set patterns. */
17393 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17395 integer_type_node, NULL_TREE);
17396 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
17398 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17400 integer_type_node, NULL_TREE);
17401 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
17403 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17405 integer_type_node, NULL_TREE);
17406 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
17408 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17410 integer_type_node, NULL_TREE);
17411 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
17413 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
17415 integer_type_node, NULL_TREE);
17416 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
17418 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17420 integer_type_node, NULL_TREE);
17421 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17423 /* Access to the vec_extract patterns. */
17424 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17425 integer_type_node, NULL_TREE);
17426 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17428 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17429 integer_type_node, NULL_TREE);
17430 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17432 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17433 integer_type_node, NULL_TREE);
17434 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17436 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17437 integer_type_node, NULL_TREE);
17438 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17440 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17441 integer_type_node, NULL_TREE);
17442 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17444 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17445 integer_type_node, NULL_TREE);
17446 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17449 if (V1TI_type_node)
17451 tree v1ti_ftype_long_pcvoid
17452 = build_function_type_list (V1TI_type_node,
17453 long_integer_type_node, pcvoid_type_node,
17455 tree void_ftype_v1ti_long_pvoid
17456 = build_function_type_list (void_type_node,
17457 V1TI_type_node, long_integer_type_node,
17458 pvoid_type_node, NULL_TREE);
17459 def_builtin ("__builtin_vsx_ld_elemrev_v1ti", v1ti_ftype_long_pcvoid,
17460 VSX_BUILTIN_LD_ELEMREV_V1TI);
17461 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17462 VSX_BUILTIN_LXVD2X_V1TI);
17463 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17464 VSX_BUILTIN_STXVD2X_V1TI);
17465 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17466 NULL_TREE, NULL_TREE);
17467 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17468 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17470 integer_type_node, NULL_TREE);
17471 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17472 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17473 integer_type_node, NULL_TREE);
17474 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17480 htm_init_builtins (void)
17482 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17483 const struct builtin_description *d;
17487 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17489 tree op[MAX_HTM_OPERANDS], type;
17490 HOST_WIDE_INT mask = d->mask;
17491 unsigned attr = rs6000_builtin_info[d->code].attr;
17492 bool void_func = (attr & RS6000_BTC_VOID);
17493 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17495 tree gpr_type_node;
17499 /* It is expected that these htm built-in functions may have
17500 d->icode equal to CODE_FOR_nothing. */
17502 if (TARGET_32BIT && TARGET_POWERPC64)
17503 gpr_type_node = long_long_unsigned_type_node;
17505 gpr_type_node = long_unsigned_type_node;
17507 if (attr & RS6000_BTC_SPR)
17509 rettype = gpr_type_node;
17510 argtype = gpr_type_node;
17512 else if (d->code == HTM_BUILTIN_TABORTDC
17513 || d->code == HTM_BUILTIN_TABORTDCI)
17515 rettype = unsigned_type_node;
17516 argtype = gpr_type_node;
17520 rettype = unsigned_type_node;
17521 argtype = unsigned_type_node;
17524 if ((mask & builtin_mask) != mask)
17526 if (TARGET_DEBUG_BUILTIN)
17527 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17533 if (TARGET_DEBUG_BUILTIN)
17534 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17535 (long unsigned) i);
17539 op[nopnds++] = (void_func) ? void_type_node : rettype;
17541 if (attr_args == RS6000_BTC_UNARY)
17542 op[nopnds++] = argtype;
17543 else if (attr_args == RS6000_BTC_BINARY)
17545 op[nopnds++] = argtype;
17546 op[nopnds++] = argtype;
17548 else if (attr_args == RS6000_BTC_TERNARY)
17550 op[nopnds++] = argtype;
17551 op[nopnds++] = argtype;
17552 op[nopnds++] = argtype;
17558 type = build_function_type_list (op[0], NULL_TREE);
17561 type = build_function_type_list (op[0], op[1], NULL_TREE);
17564 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17567 type = build_function_type_list (op[0], op[1], op[2], op[3],
17571 gcc_unreachable ();
17574 def_builtin (d->name, type, d->code);
17578 /* Hash function for builtin functions with up to 3 arguments and a return
17581 builtin_hasher::hash (builtin_hash_struct *bh)
17586 for (i = 0; i < 4; i++)
17588 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17589 ret = (ret * 2) + bh->uns_p[i];
17595 /* Compare builtin hash entries H1 and H2 for equivalence. */
17597 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17599 return ((p1->mode[0] == p2->mode[0])
17600 && (p1->mode[1] == p2->mode[1])
17601 && (p1->mode[2] == p2->mode[2])
17602 && (p1->mode[3] == p2->mode[3])
17603 && (p1->uns_p[0] == p2->uns_p[0])
17604 && (p1->uns_p[1] == p2->uns_p[1])
17605 && (p1->uns_p[2] == p2->uns_p[2])
17606 && (p1->uns_p[3] == p2->uns_p[3]));
17609 /* Map types for builtin functions with an explicit return type and up to 3
17610 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17611 of the argument. */
17613 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17614 machine_mode mode_arg1, machine_mode mode_arg2,
17615 enum rs6000_builtins builtin, const char *name)
17617 struct builtin_hash_struct h;
17618 struct builtin_hash_struct *h2;
17621 tree ret_type = NULL_TREE;
17622 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17624 /* Create builtin_hash_table. */
17625 if (builtin_hash_table == NULL)
17626 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17628 h.type = NULL_TREE;
17629 h.mode[0] = mode_ret;
17630 h.mode[1] = mode_arg0;
17631 h.mode[2] = mode_arg1;
17632 h.mode[3] = mode_arg2;
17638 /* If the builtin is a type that produces unsigned results or takes unsigned
17639 arguments, and it is returned as a decl for the vectorizer (such as
17640 widening multiplies, permute), make sure the arguments and return value
17641 are type correct. */
17644 /* unsigned 1 argument functions. */
17645 case CRYPTO_BUILTIN_VSBOX:
17646 case P8V_BUILTIN_VGBBD:
17647 case MISC_BUILTIN_CDTBCD:
17648 case MISC_BUILTIN_CBCDTD:
17653 /* unsigned 2 argument functions. */
17654 case ALTIVEC_BUILTIN_VMULEUB:
17655 case ALTIVEC_BUILTIN_VMULEUH:
17656 case P8V_BUILTIN_VMULEUW:
17657 case ALTIVEC_BUILTIN_VMULOUB:
17658 case ALTIVEC_BUILTIN_VMULOUH:
17659 case P8V_BUILTIN_VMULOUW:
17660 case CRYPTO_BUILTIN_VCIPHER:
17661 case CRYPTO_BUILTIN_VCIPHERLAST:
17662 case CRYPTO_BUILTIN_VNCIPHER:
17663 case CRYPTO_BUILTIN_VNCIPHERLAST:
17664 case CRYPTO_BUILTIN_VPMSUMB:
17665 case CRYPTO_BUILTIN_VPMSUMH:
17666 case CRYPTO_BUILTIN_VPMSUMW:
17667 case CRYPTO_BUILTIN_VPMSUMD:
17668 case CRYPTO_BUILTIN_VPMSUM:
17669 case MISC_BUILTIN_ADDG6S:
17670 case MISC_BUILTIN_DIVWEU:
17671 case MISC_BUILTIN_DIVDEU:
17672 case VSX_BUILTIN_UDIV_V2DI:
17673 case ALTIVEC_BUILTIN_VMAXUB:
17674 case ALTIVEC_BUILTIN_VMINUB:
17675 case ALTIVEC_BUILTIN_VMAXUH:
17676 case ALTIVEC_BUILTIN_VMINUH:
17677 case ALTIVEC_BUILTIN_VMAXUW:
17678 case ALTIVEC_BUILTIN_VMINUW:
17679 case P8V_BUILTIN_VMAXUD:
17680 case P8V_BUILTIN_VMINUD:
17686 /* unsigned 3 argument functions. */
17687 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
17688 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
17689 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
17690 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
17691 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
17692 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
17693 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
17694 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
17695 case VSX_BUILTIN_VPERM_16QI_UNS:
17696 case VSX_BUILTIN_VPERM_8HI_UNS:
17697 case VSX_BUILTIN_VPERM_4SI_UNS:
17698 case VSX_BUILTIN_VPERM_2DI_UNS:
17699 case VSX_BUILTIN_XXSEL_16QI_UNS:
17700 case VSX_BUILTIN_XXSEL_8HI_UNS:
17701 case VSX_BUILTIN_XXSEL_4SI_UNS:
17702 case VSX_BUILTIN_XXSEL_2DI_UNS:
17703 case CRYPTO_BUILTIN_VPERMXOR:
17704 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
17705 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
17706 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
17707 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
17708 case CRYPTO_BUILTIN_VSHASIGMAW:
17709 case CRYPTO_BUILTIN_VSHASIGMAD:
17710 case CRYPTO_BUILTIN_VSHASIGMA:
17717 /* signed permute functions with unsigned char mask. */
17718 case ALTIVEC_BUILTIN_VPERM_16QI:
17719 case ALTIVEC_BUILTIN_VPERM_8HI:
17720 case ALTIVEC_BUILTIN_VPERM_4SI:
17721 case ALTIVEC_BUILTIN_VPERM_4SF:
17722 case ALTIVEC_BUILTIN_VPERM_2DI:
17723 case ALTIVEC_BUILTIN_VPERM_2DF:
17724 case VSX_BUILTIN_VPERM_16QI:
17725 case VSX_BUILTIN_VPERM_8HI:
17726 case VSX_BUILTIN_VPERM_4SI:
17727 case VSX_BUILTIN_VPERM_4SF:
17728 case VSX_BUILTIN_VPERM_2DI:
17729 case VSX_BUILTIN_VPERM_2DF:
17733 /* unsigned args, signed return. */
17734 case VSX_BUILTIN_XVCVUXDSP:
17735 case VSX_BUILTIN_XVCVUXDDP_UNS:
17736 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
17740 /* signed args, unsigned return. */
17741 case VSX_BUILTIN_XVCVDPUXDS_UNS:
17742 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
17743 case MISC_BUILTIN_UNPACK_TD:
17744 case MISC_BUILTIN_UNPACK_V1TI:
17748 /* unsigned arguments, bool return (compares). */
17749 case ALTIVEC_BUILTIN_VCMPEQUB:
17750 case ALTIVEC_BUILTIN_VCMPEQUH:
17751 case ALTIVEC_BUILTIN_VCMPEQUW:
17752 case P8V_BUILTIN_VCMPEQUD:
17753 case VSX_BUILTIN_CMPGE_U16QI:
17754 case VSX_BUILTIN_CMPGE_U8HI:
17755 case VSX_BUILTIN_CMPGE_U4SI:
17756 case VSX_BUILTIN_CMPGE_U2DI:
17757 case ALTIVEC_BUILTIN_VCMPGTUB:
17758 case ALTIVEC_BUILTIN_VCMPGTUH:
17759 case ALTIVEC_BUILTIN_VCMPGTUW:
17760 case P8V_BUILTIN_VCMPGTUD:
17765 /* unsigned arguments for 128-bit pack instructions. */
17766 case MISC_BUILTIN_PACK_TD:
17767 case MISC_BUILTIN_PACK_V1TI:
17772 /* unsigned second arguments (vector shift right). */
17773 case ALTIVEC_BUILTIN_VSRB:
17774 case ALTIVEC_BUILTIN_VSRH:
17775 case ALTIVEC_BUILTIN_VSRW:
17776 case P8V_BUILTIN_VSRD:
17784 /* Figure out how many args are present. */
17785 while (num_args > 0 && h.mode[num_args] == VOIDmode)
17788 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
17789 if (!ret_type && h.uns_p[0])
17790 ret_type = builtin_mode_to_type[h.mode[0]][0];
17793 fatal_error (input_location,
17794 "internal error: builtin function %qs had an unexpected "
17795 "return type %qs", name, GET_MODE_NAME (h.mode[0]));
17797 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
17798 arg_type[i] = NULL_TREE;
17800 for (i = 0; i < num_args; i++)
17802 int m = (int) h.mode[i+1];
17803 int uns_p = h.uns_p[i+1];
17805 arg_type[i] = builtin_mode_to_type[m][uns_p];
17806 if (!arg_type[i] && uns_p)
17807 arg_type[i] = builtin_mode_to_type[m][0];
17810 fatal_error (input_location,
17811 "internal error: builtin function %qs, argument %d "
17812 "had unexpected argument type %qs", name, i,
17813 GET_MODE_NAME (m));
17816 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
17817 if (*found == NULL)
17819 h2 = ggc_alloc<builtin_hash_struct> ();
17823 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
17824 arg_type[2], NULL_TREE);
17827 return (*found)->type;
17831 rs6000_common_init_builtins (void)
17833 const struct builtin_description *d;
17836 tree opaque_ftype_opaque = NULL_TREE;
17837 tree opaque_ftype_opaque_opaque = NULL_TREE;
17838 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
17839 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17841 /* Create Altivec and VSX builtins on machines with at least the
17842 general purpose extensions (970 and newer) to allow the use of
17843 the target attribute. */
17845 if (TARGET_EXTRA_BUILTINS)
17846 builtin_mask |= RS6000_BTM_COMMON;
17848 /* Add the ternary operators. */
17850 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17853 HOST_WIDE_INT mask = d->mask;
17855 if ((mask & builtin_mask) != mask)
17857 if (TARGET_DEBUG_BUILTIN)
17858 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
17862 if (rs6000_overloaded_builtin_p (d->code))
17864 if (! (type = opaque_ftype_opaque_opaque_opaque))
17865 type = opaque_ftype_opaque_opaque_opaque
17866 = build_function_type_list (opaque_V4SI_type_node,
17867 opaque_V4SI_type_node,
17868 opaque_V4SI_type_node,
17869 opaque_V4SI_type_node,
17874 enum insn_code icode = d->icode;
17877 if (TARGET_DEBUG_BUILTIN)
17878 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
17884 if (icode == CODE_FOR_nothing)
17886 if (TARGET_DEBUG_BUILTIN)
17887 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
17893 type = builtin_function_type (insn_data[icode].operand[0].mode,
17894 insn_data[icode].operand[1].mode,
17895 insn_data[icode].operand[2].mode,
17896 insn_data[icode].operand[3].mode,
17900 def_builtin (d->name, type, d->code);
17903 /* Add the binary operators. */
17905 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17907 machine_mode mode0, mode1, mode2;
17909 HOST_WIDE_INT mask = d->mask;
17911 if ((mask & builtin_mask) != mask)
17913 if (TARGET_DEBUG_BUILTIN)
17914 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
17918 if (rs6000_overloaded_builtin_p (d->code))
17920 if (! (type = opaque_ftype_opaque_opaque))
17921 type = opaque_ftype_opaque_opaque
17922 = build_function_type_list (opaque_V4SI_type_node,
17923 opaque_V4SI_type_node,
17924 opaque_V4SI_type_node,
17929 enum insn_code icode = d->icode;
17932 if (TARGET_DEBUG_BUILTIN)
17933 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
17939 if (icode == CODE_FOR_nothing)
17941 if (TARGET_DEBUG_BUILTIN)
17942 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
17948 mode0 = insn_data[icode].operand[0].mode;
17949 mode1 = insn_data[icode].operand[1].mode;
17950 mode2 = insn_data[icode].operand[2].mode;
17952 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
17956 def_builtin (d->name, type, d->code);
17959 /* Add the simple unary operators. */
17961 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17963 machine_mode mode0, mode1;
17965 HOST_WIDE_INT mask = d->mask;
17967 if ((mask & builtin_mask) != mask)
17969 if (TARGET_DEBUG_BUILTIN)
17970 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
17974 if (rs6000_overloaded_builtin_p (d->code))
17976 if (! (type = opaque_ftype_opaque))
17977 type = opaque_ftype_opaque
17978 = build_function_type_list (opaque_V4SI_type_node,
17979 opaque_V4SI_type_node,
17984 enum insn_code icode = d->icode;
17987 if (TARGET_DEBUG_BUILTIN)
17988 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
17994 if (icode == CODE_FOR_nothing)
17996 if (TARGET_DEBUG_BUILTIN)
17997 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
18003 mode0 = insn_data[icode].operand[0].mode;
18004 mode1 = insn_data[icode].operand[1].mode;
18006 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
18010 def_builtin (d->name, type, d->code);
18013 /* Add the simple no-argument operators. */
18015 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
18017 machine_mode mode0;
18019 HOST_WIDE_INT mask = d->mask;
18021 if ((mask & builtin_mask) != mask)
18023 if (TARGET_DEBUG_BUILTIN)
18024 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
18027 if (rs6000_overloaded_builtin_p (d->code))
18029 if (!opaque_ftype_opaque)
18030 opaque_ftype_opaque
18031 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
18032 type = opaque_ftype_opaque;
18036 enum insn_code icode = d->icode;
18039 if (TARGET_DEBUG_BUILTIN)
18040 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
18041 (long unsigned) i);
18044 if (icode == CODE_FOR_nothing)
18046 if (TARGET_DEBUG_BUILTIN)
18048 "rs6000_builtin, skip no-argument %s (no code)\n",
18052 mode0 = insn_data[icode].operand[0].mode;
18053 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
18056 def_builtin (d->name, type, d->code);
18060 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
18062 init_float128_ibm (machine_mode mode)
18064 if (!TARGET_XL_COMPAT)
18066 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
18067 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
18068 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
18069 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
18071 if (!TARGET_HARD_FLOAT)
18073 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
18074 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
18075 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
18076 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
18077 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
18078 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
18079 set_optab_libfunc (le_optab, mode, "__gcc_qle");
18080 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
18082 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
18083 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
18084 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
18085 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
18086 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
18087 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
18088 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
18089 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
18094 set_optab_libfunc (add_optab, mode, "_xlqadd");
18095 set_optab_libfunc (sub_optab, mode, "_xlqsub");
18096 set_optab_libfunc (smul_optab, mode, "_xlqmul");
18097 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
18100 /* Add various conversions for IFmode to use the traditional TFmode
18102 if (mode == IFmode)
18104 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
18105 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
18106 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
18107 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
18108 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
18109 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
18111 if (TARGET_POWERPC64)
18113 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
18114 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
18115 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
18116 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
18121 /* Create a decl for either complex long double multiply or complex long double
18122 divide when long double is IEEE 128-bit floating point. We can't use
18123 __multc3 and __divtc3 because the original long double using IBM extended
18124 double used those names. The complex multiply/divide functions are encoded
18125 as builtin functions with a complex result and 4 scalar inputs. */
18128 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
18130 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
18133 set_builtin_decl (fncode, fndecl, true);
18135 if (TARGET_DEBUG_BUILTIN)
18136 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
18141 /* Set up IEEE 128-bit floating point routines. Use different names if the
18142 arguments can be passed in a vector register. The historical PowerPC
18143 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
18144 continue to use that if we aren't using vector registers to pass IEEE
18145 128-bit floating point. */
18148 init_float128_ieee (machine_mode mode)
18150 if (FLOAT128_VECTOR_P (mode))
18152 static bool complex_muldiv_init_p = false;
18154 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
18155 we have clone or target attributes, this will be called a second
18156 time. We want to create the built-in function only once. */
18157 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
18159 complex_muldiv_init_p = true;
18160 built_in_function fncode_mul =
18161 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
18162 - MIN_MODE_COMPLEX_FLOAT);
18163 built_in_function fncode_div =
18164 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
18165 - MIN_MODE_COMPLEX_FLOAT);
18167 tree fntype = build_function_type_list (complex_long_double_type_node,
18168 long_double_type_node,
18169 long_double_type_node,
18170 long_double_type_node,
18171 long_double_type_node,
18174 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
18175 create_complex_muldiv ("__divkc3", fncode_div, fntype);
18178 set_optab_libfunc (add_optab, mode, "__addkf3");
18179 set_optab_libfunc (sub_optab, mode, "__subkf3");
18180 set_optab_libfunc (neg_optab, mode, "__negkf2");
18181 set_optab_libfunc (smul_optab, mode, "__mulkf3");
18182 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
18183 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
18184 set_optab_libfunc (abs_optab, mode, "__abskf2");
18185 set_optab_libfunc (powi_optab, mode, "__powikf2");
18187 set_optab_libfunc (eq_optab, mode, "__eqkf2");
18188 set_optab_libfunc (ne_optab, mode, "__nekf2");
18189 set_optab_libfunc (gt_optab, mode, "__gtkf2");
18190 set_optab_libfunc (ge_optab, mode, "__gekf2");
18191 set_optab_libfunc (lt_optab, mode, "__ltkf2");
18192 set_optab_libfunc (le_optab, mode, "__lekf2");
18193 set_optab_libfunc (unord_optab, mode, "__unordkf2");
18195 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
18196 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
18197 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
18198 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
18200 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
18201 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18202 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
18204 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
18205 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18206 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
18208 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
18209 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
18210 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
18211 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
18212 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
18213 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
18215 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
18216 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
18217 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
18218 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
18220 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
18221 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
18222 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
18223 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
18225 if (TARGET_POWERPC64)
18227 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
18228 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
18229 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
18230 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
18236 set_optab_libfunc (add_optab, mode, "_q_add");
18237 set_optab_libfunc (sub_optab, mode, "_q_sub");
18238 set_optab_libfunc (neg_optab, mode, "_q_neg");
18239 set_optab_libfunc (smul_optab, mode, "_q_mul");
18240 set_optab_libfunc (sdiv_optab, mode, "_q_div");
18241 if (TARGET_PPC_GPOPT)
18242 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
18244 set_optab_libfunc (eq_optab, mode, "_q_feq");
18245 set_optab_libfunc (ne_optab, mode, "_q_fne");
18246 set_optab_libfunc (gt_optab, mode, "_q_fgt");
18247 set_optab_libfunc (ge_optab, mode, "_q_fge");
18248 set_optab_libfunc (lt_optab, mode, "_q_flt");
18249 set_optab_libfunc (le_optab, mode, "_q_fle");
18251 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
18252 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
18253 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
18254 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
18255 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
18256 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
18257 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
18258 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
18263 rs6000_init_libfuncs (void)
18265 /* __float128 support. */
18266 if (TARGET_FLOAT128_TYPE)
18268 init_float128_ibm (IFmode);
18269 init_float128_ieee (KFmode);
18272 /* AIX/Darwin/64-bit Linux quad floating point routines. */
18273 if (TARGET_LONG_DOUBLE_128)
18275 if (!TARGET_IEEEQUAD)
18276 init_float128_ibm (TFmode);
18278 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
18280 init_float128_ieee (TFmode);
18284 /* Emit a potentially record-form instruction, setting DST from SRC.
18285 If DOT is 0, that is all; otherwise, set CCREG to the result of the
18286 signed comparison of DST with zero. If DOT is 1, the generated RTL
18287 doesn't care about the DST result; if DOT is 2, it does. If CCREG
18288 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
18289 a separate COMPARE. */
18292 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
18296 emit_move_insn (dst, src);
18300 if (cc_reg_not_cr0_operand (ccreg, CCmode))
18302 emit_move_insn (dst, src);
18303 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
18307 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
18310 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
18311 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
18315 rtx set = gen_rtx_SET (dst, src);
18316 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
18321 /* A validation routine: say whether CODE, a condition code, and MODE
18322 match. The other alternatives either don't make sense or should
18323 never be generated. */
18326 validate_condition_mode (enum rtx_code code, machine_mode mode)
18328 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
18329 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
18330 && GET_MODE_CLASS (mode) == MODE_CC);
18332 /* These don't make sense. */
18333 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
18334 || mode != CCUNSmode);
18336 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
18337 || mode == CCUNSmode);
18339 gcc_assert (mode == CCFPmode
18340 || (code != ORDERED && code != UNORDERED
18341 && code != UNEQ && code != LTGT
18342 && code != UNGT && code != UNLT
18343 && code != UNGE && code != UNLE));
18345 /* These should never be generated except for
18346 flag_finite_math_only. */
18347 gcc_assert (mode != CCFPmode
18348 || flag_finite_math_only
18349 || (code != LE && code != GE
18350 && code != UNEQ && code != LTGT
18351 && code != UNGT && code != UNLT));
18353 /* These are invalid; the information is not there. */
18354 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
18358 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
18359 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
18360 not zero, store there the bit offset (counted from the right) where
18361 the single stretch of 1 bits begins; and similarly for B, the bit
18362 offset where it ends. */
18365 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
18367 unsigned HOST_WIDE_INT val = INTVAL (mask);
18368 unsigned HOST_WIDE_INT bit;
18370 int n = GET_MODE_PRECISION (mode);
18372 if (mode != DImode && mode != SImode)
18375 if (INTVAL (mask) >= 0)
18378 ne = exact_log2 (bit);
18379 nb = exact_log2 (val + bit);
18381 else if (val + 1 == 0)
18390 nb = exact_log2 (bit);
18391 ne = exact_log2 (val + bit);
18396 ne = exact_log2 (bit);
18397 if (val + bit == 0)
18405 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
18416 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
18417 or rldicr instruction, to implement an AND with it in mode MODE. */
18420 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
18424 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18427 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
18429 if (mode == DImode)
18430 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
18432 /* For SImode, rlwinm can do everything. */
18433 if (mode == SImode)
18434 return (nb < 32 && ne < 32);
18439 /* Return the instruction template for an AND with mask in mode MODE, with
18440 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18443 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
18447 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
18448 gcc_unreachable ();
18450 if (mode == DImode && ne == 0)
18452 operands[3] = GEN_INT (63 - nb);
18454 return "rldicl. %0,%1,0,%3";
18455 return "rldicl %0,%1,0,%3";
18458 if (mode == DImode && nb == 63)
18460 operands[3] = GEN_INT (63 - ne);
18462 return "rldicr. %0,%1,0,%3";
18463 return "rldicr %0,%1,0,%3";
18466 if (nb < 32 && ne < 32)
18468 operands[3] = GEN_INT (31 - nb);
18469 operands[4] = GEN_INT (31 - ne);
18471 return "rlwinm. %0,%1,0,%3,%4";
18472 return "rlwinm %0,%1,0,%3,%4";
18475 gcc_unreachable ();
18478 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
18479 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
18480 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
18483 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
18487 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18490 int n = GET_MODE_PRECISION (mode);
18493 if (CONST_INT_P (XEXP (shift, 1)))
18495 sh = INTVAL (XEXP (shift, 1));
18496 if (sh < 0 || sh >= n)
18500 rtx_code code = GET_CODE (shift);
18502 /* Convert any shift by 0 to a rotate, to simplify below code. */
18506 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18507 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18509 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18515 /* DImode rotates need rld*. */
18516 if (mode == DImode && code == ROTATE)
18517 return (nb == 63 || ne == 0 || ne == sh);
18519 /* SImode rotates need rlw*. */
18520 if (mode == SImode && code == ROTATE)
18521 return (nb < 32 && ne < 32 && sh < 32);
18523 /* Wrap-around masks are only okay for rotates. */
18527 /* Variable shifts are only okay for rotates. */
18531 /* Don't allow ASHIFT if the mask is wrong for that. */
18532 if (code == ASHIFT && ne < sh)
18535 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
18536 if the mask is wrong for that. */
18537 if (nb < 32 && ne < 32 && sh < 32
18538 && !(code == LSHIFTRT && nb >= 32 - sh))
18541 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
18542 if the mask is wrong for that. */
18543 if (code == LSHIFTRT)
18545 if (nb == 63 || ne == 0 || ne == sh)
18546 return !(code == LSHIFTRT && nb >= sh);
18551 /* Return the instruction template for a shift with mask in mode MODE, with
18552 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18555 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
18559 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18560 gcc_unreachable ();
18562 if (mode == DImode && ne == 0)
18564 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18565 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
18566 operands[3] = GEN_INT (63 - nb);
18568 return "rld%I2cl. %0,%1,%2,%3";
18569 return "rld%I2cl %0,%1,%2,%3";
18572 if (mode == DImode && nb == 63)
18574 operands[3] = GEN_INT (63 - ne);
18576 return "rld%I2cr. %0,%1,%2,%3";
18577 return "rld%I2cr %0,%1,%2,%3";
18581 && GET_CODE (operands[4]) != LSHIFTRT
18582 && CONST_INT_P (operands[2])
18583 && ne == INTVAL (operands[2]))
18585 operands[3] = GEN_INT (63 - nb);
18587 return "rld%I2c. %0,%1,%2,%3";
18588 return "rld%I2c %0,%1,%2,%3";
18591 if (nb < 32 && ne < 32)
18593 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18594 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18595 operands[3] = GEN_INT (31 - nb);
18596 operands[4] = GEN_INT (31 - ne);
18597 /* This insn can also be a 64-bit rotate with mask that really makes
18598 it just a shift right (with mask); the %h below are to adjust for
18599 that situation (shift count is >= 32 in that case). */
18601 return "rlw%I2nm. %0,%1,%h2,%3,%4";
18602 return "rlw%I2nm %0,%1,%h2,%3,%4";
18605 gcc_unreachable ();
18608 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
18609 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
18610 ASHIFT, or LSHIFTRT) in mode MODE. */
18613 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
18617 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18620 int n = GET_MODE_PRECISION (mode);
18622 int sh = INTVAL (XEXP (shift, 1));
18623 if (sh < 0 || sh >= n)
18626 rtx_code code = GET_CODE (shift);
18628 /* Convert any shift by 0 to a rotate, to simplify below code. */
18632 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18633 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18635 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18641 /* DImode rotates need rldimi. */
18642 if (mode == DImode && code == ROTATE)
18645 /* SImode rotates need rlwimi. */
18646 if (mode == SImode && code == ROTATE)
18647 return (nb < 32 && ne < 32 && sh < 32);
18649 /* Wrap-around masks are only okay for rotates. */
18653 /* Don't allow ASHIFT if the mask is wrong for that. */
18654 if (code == ASHIFT && ne < sh)
18657 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
18658 if the mask is wrong for that. */
18659 if (nb < 32 && ne < 32 && sh < 32
18660 && !(code == LSHIFTRT && nb >= 32 - sh))
18663 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
18664 if the mask is wrong for that. */
18665 if (code == LSHIFTRT)
18668 return !(code == LSHIFTRT && nb >= sh);
18673 /* Return the instruction template for an insert with mask in mode MODE, with
18674 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18677 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
18681 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18682 gcc_unreachable ();
18684 /* Prefer rldimi because rlwimi is cracked. */
18685 if (TARGET_POWERPC64
18686 && (!dot || mode == DImode)
18687 && GET_CODE (operands[4]) != LSHIFTRT
18688 && ne == INTVAL (operands[2]))
18690 operands[3] = GEN_INT (63 - nb);
18692 return "rldimi. %0,%1,%2,%3";
18693 return "rldimi %0,%1,%2,%3";
18696 if (nb < 32 && ne < 32)
18698 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18699 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18700 operands[3] = GEN_INT (31 - nb);
18701 operands[4] = GEN_INT (31 - ne);
18703 return "rlwimi. %0,%1,%2,%3,%4";
18704 return "rlwimi %0,%1,%2,%3,%4";
18707 gcc_unreachable ();
18710 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
18711 using two machine instructions. */
18714 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
18716 /* There are two kinds of AND we can handle with two insns:
18717 1) those we can do with two rl* insn;
18720 We do not handle that last case yet. */
18722 /* If there is just one stretch of ones, we can do it. */
18723 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
18726 /* Otherwise, fill in the lowest "hole"; if we can do the result with
18727 one insn, we can do the whole thing with two. */
18728 unsigned HOST_WIDE_INT val = INTVAL (c);
18729 unsigned HOST_WIDE_INT bit1 = val & -val;
18730 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18731 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18732 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18733 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
18736 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
18737 If EXPAND is true, split rotate-and-mask instructions we generate to
18738 their constituent parts as well (this is used during expand); if DOT
18739 is 1, make the last insn a record-form instruction clobbering the
18740 destination GPR and setting the CC reg (from operands[3]); if 2, set
18741 that GPR as well as the CC reg. */
18744 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
18746 gcc_assert (!(expand && dot));
18748 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
18750 /* If it is one stretch of ones, it is DImode; shift left, mask, then
18751 shift right. This generates better code than doing the masks without
18752 shifts, or shifting first right and then left. */
18754 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
18756 gcc_assert (mode == DImode);
18758 int shift = 63 - nb;
18761 rtx tmp1 = gen_reg_rtx (DImode);
18762 rtx tmp2 = gen_reg_rtx (DImode);
18763 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
18764 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
18765 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
18769 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
18770 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
18771 emit_move_insn (operands[0], tmp);
18772 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
18773 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18778 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
18779 that does the rest. */
18780 unsigned HOST_WIDE_INT bit1 = val & -val;
18781 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18782 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18783 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18785 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
18786 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
18788 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
18790 /* Two "no-rotate"-and-mask instructions, for SImode. */
18791 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
18793 gcc_assert (mode == SImode);
18795 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18796 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
18797 emit_move_insn (reg, tmp);
18798 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18799 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18803 gcc_assert (mode == DImode);
18805 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
18806 insns; we have to do the first in SImode, because it wraps. */
18807 if (mask2 <= 0xffffffff
18808 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
18810 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18811 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
18813 rtx reg_low = gen_lowpart (SImode, reg);
18814 emit_move_insn (reg_low, tmp);
18815 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18816 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18820 /* Two rld* insns: rotate, clear the hole in the middle (which now is
18821 at the top end), rotate back and clear the other hole. */
18822 int right = exact_log2 (bit3);
18823 int left = 64 - right;
18825 /* Rotate the mask too. */
18826 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
18830 rtx tmp1 = gen_reg_rtx (DImode);
18831 rtx tmp2 = gen_reg_rtx (DImode);
18832 rtx tmp3 = gen_reg_rtx (DImode);
18833 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
18834 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
18835 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
18836 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
18840 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
18841 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
18842 emit_move_insn (operands[0], tmp);
18843 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
18844 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
18845 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18849 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
18850 for lfq and stfq insns iff the registers are hard registers. */
18853 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
18855 /* We might have been passed a SUBREG. */
18856 if (!REG_P (reg1) || !REG_P (reg2))
18859 /* We might have been passed non floating point registers. */
18860 if (!FP_REGNO_P (REGNO (reg1))
18861 || !FP_REGNO_P (REGNO (reg2)))
18864 return (REGNO (reg1) == REGNO (reg2) - 1);
18867 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
18868 addr1 and addr2 must be in consecutive memory locations
18869 (addr2 == addr1 + 8). */
18872 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
18875 unsigned int reg1, reg2;
18876 int offset1, offset2;
18878 /* The mems cannot be volatile. */
18879 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
18882 addr1 = XEXP (mem1, 0);
18883 addr2 = XEXP (mem2, 0);
18885 /* Extract an offset (if used) from the first addr. */
18886 if (GET_CODE (addr1) == PLUS)
18888 /* If not a REG, return zero. */
18889 if (!REG_P (XEXP (addr1, 0)))
18893 reg1 = REGNO (XEXP (addr1, 0));
18894 /* The offset must be constant! */
18895 if (!CONST_INT_P (XEXP (addr1, 1)))
18897 offset1 = INTVAL (XEXP (addr1, 1));
18900 else if (!REG_P (addr1))
18904 reg1 = REGNO (addr1);
18905 /* This was a simple (mem (reg)) expression. Offset is 0. */
18909 /* And now for the second addr. */
18910 if (GET_CODE (addr2) == PLUS)
18912 /* If not a REG, return zero. */
18913 if (!REG_P (XEXP (addr2, 0)))
18917 reg2 = REGNO (XEXP (addr2, 0));
18918 /* The offset must be constant. */
18919 if (!CONST_INT_P (XEXP (addr2, 1)))
18921 offset2 = INTVAL (XEXP (addr2, 1));
18924 else if (!REG_P (addr2))
18928 reg2 = REGNO (addr2);
18929 /* This was a simple (mem (reg)) expression. Offset is 0. */
18933 /* Both of these must have the same base register. */
18937 /* The offset for the second addr must be 8 more than the first addr. */
18938 if (offset2 != offset1 + 8)
18941 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
18946 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
18947 need to use DDmode, in all other cases we can use the same mode. */
18948 static machine_mode
18949 rs6000_secondary_memory_needed_mode (machine_mode mode)
18951 if (lra_in_progress && mode == SDmode)
18956 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
18957 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
18958 only work on the traditional altivec registers, note if an altivec register
18961 static enum rs6000_reg_type
18962 register_to_reg_type (rtx reg, bool *is_altivec)
18964 HOST_WIDE_INT regno;
18965 enum reg_class rclass;
18967 if (SUBREG_P (reg))
18968 reg = SUBREG_REG (reg);
18971 return NO_REG_TYPE;
18973 regno = REGNO (reg);
18974 if (!HARD_REGISTER_NUM_P (regno))
18976 if (!lra_in_progress && !reload_completed)
18977 return PSEUDO_REG_TYPE;
18979 regno = true_regnum (reg);
18980 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
18981 return PSEUDO_REG_TYPE;
18984 gcc_assert (regno >= 0);
18986 if (is_altivec && ALTIVEC_REGNO_P (regno))
18987 *is_altivec = true;
18989 rclass = rs6000_regno_regclass[regno];
18990 return reg_class_to_reg_type[(int)rclass];
18993 /* Helper function to return the cost of adding a TOC entry address. */
18996 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
19000 if (TARGET_CMODEL != CMODEL_SMALL)
19001 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
19004 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
19009 /* Helper function for rs6000_secondary_reload to determine whether the memory
19010 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
19011 needs reloading. Return negative if the memory is not handled by the memory
19012 helper functions and to try a different reload method, 0 if no additional
19013 instructions are need, and positive to give the extra cost for the
19017 rs6000_secondary_reload_memory (rtx addr,
19018 enum reg_class rclass,
19021 int extra_cost = 0;
19022 rtx reg, and_arg, plus_arg0, plus_arg1;
19023 addr_mask_type addr_mask;
19024 const char *type = NULL;
19025 const char *fail_msg = NULL;
19027 if (GPR_REG_CLASS_P (rclass))
19028 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19030 else if (rclass == FLOAT_REGS)
19031 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19033 else if (rclass == ALTIVEC_REGS)
19034 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19036 /* For the combined VSX_REGS, turn off Altivec AND -16. */
19037 else if (rclass == VSX_REGS)
19038 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
19039 & ~RELOAD_REG_AND_M16);
19041 /* If the register allocator hasn't made up its mind yet on the register
19042 class to use, settle on defaults to use. */
19043 else if (rclass == NO_REGS)
19045 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
19046 & ~RELOAD_REG_AND_M16);
19048 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
19049 addr_mask &= ~(RELOAD_REG_INDEXED
19050 | RELOAD_REG_PRE_INCDEC
19051 | RELOAD_REG_PRE_MODIFY);
19057 /* If the register isn't valid in this register class, just return now. */
19058 if ((addr_mask & RELOAD_REG_VALID) == 0)
19060 if (TARGET_DEBUG_ADDR)
19063 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19064 "not valid in class\n",
19065 GET_MODE_NAME (mode), reg_class_names[rclass]);
19072 switch (GET_CODE (addr))
19074 /* Does the register class supports auto update forms for this mode? We
19075 don't need a scratch register, since the powerpc only supports
19076 PRE_INC, PRE_DEC, and PRE_MODIFY. */
19079 reg = XEXP (addr, 0);
19080 if (!base_reg_operand (addr, GET_MODE (reg)))
19082 fail_msg = "no base register #1";
19086 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19094 reg = XEXP (addr, 0);
19095 plus_arg1 = XEXP (addr, 1);
19096 if (!base_reg_operand (reg, GET_MODE (reg))
19097 || GET_CODE (plus_arg1) != PLUS
19098 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
19100 fail_msg = "bad PRE_MODIFY";
19104 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19111 /* Do we need to simulate AND -16 to clear the bottom address bits used
19112 in VMX load/stores? Only allow the AND for vector sizes. */
19114 and_arg = XEXP (addr, 0);
19115 if (GET_MODE_SIZE (mode) != 16
19116 || !CONST_INT_P (XEXP (addr, 1))
19117 || INTVAL (XEXP (addr, 1)) != -16)
19119 fail_msg = "bad Altivec AND #1";
19123 if (rclass != ALTIVEC_REGS)
19125 if (legitimate_indirect_address_p (and_arg, false))
19128 else if (legitimate_indexed_address_p (and_arg, false))
19133 fail_msg = "bad Altivec AND #2";
19141 /* If this is an indirect address, make sure it is a base register. */
19144 if (!legitimate_indirect_address_p (addr, false))
19151 /* If this is an indexed address, make sure the register class can handle
19152 indexed addresses for this mode. */
19154 plus_arg0 = XEXP (addr, 0);
19155 plus_arg1 = XEXP (addr, 1);
19157 /* (plus (plus (reg) (constant)) (constant)) is generated during
19158 push_reload processing, so handle it now. */
19159 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
19161 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19168 /* (plus (plus (reg) (constant)) (reg)) is also generated during
19169 push_reload processing, so handle it now. */
19170 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
19172 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19175 type = "indexed #2";
19179 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
19181 fail_msg = "no base register #2";
19185 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
19187 if ((addr_mask & RELOAD_REG_INDEXED) == 0
19188 || !legitimate_indexed_address_p (addr, false))
19195 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
19196 && CONST_INT_P (plus_arg1))
19198 if (!quad_address_offset_p (INTVAL (plus_arg1)))
19201 type = "vector d-form offset";
19205 /* Make sure the register class can handle offset addresses. */
19206 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19208 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19211 type = "offset #2";
19217 fail_msg = "bad PLUS";
19224 /* Quad offsets are restricted and can't handle normal addresses. */
19225 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19228 type = "vector d-form lo_sum";
19231 else if (!legitimate_lo_sum_address_p (mode, addr, false))
19233 fail_msg = "bad LO_SUM";
19237 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19244 /* Static addresses need to create a TOC entry. */
19248 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19251 type = "vector d-form lo_sum #2";
19257 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
19261 /* TOC references look like offsetable memory. */
19263 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
19265 fail_msg = "bad UNSPEC";
19269 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19272 type = "vector d-form lo_sum #3";
19275 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19278 type = "toc reference";
19284 fail_msg = "bad address";
19289 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
19291 if (extra_cost < 0)
19293 "rs6000_secondary_reload_memory error: mode = %s, "
19294 "class = %s, addr_mask = '%s', %s\n",
19295 GET_MODE_NAME (mode),
19296 reg_class_names[rclass],
19297 rs6000_debug_addr_mask (addr_mask, false),
19298 (fail_msg != NULL) ? fail_msg : "<bad address>");
19302 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19303 "addr_mask = '%s', extra cost = %d, %s\n",
19304 GET_MODE_NAME (mode),
19305 reg_class_names[rclass],
19306 rs6000_debug_addr_mask (addr_mask, false),
19308 (type) ? type : "<none>");
19316 /* Helper function for rs6000_secondary_reload to return true if a move to a
19317 different register classe is really a simple move. */
19320 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
19321 enum rs6000_reg_type from_type,
19324 int size = GET_MODE_SIZE (mode);
19326 /* Add support for various direct moves available. In this function, we only
19327 look at cases where we don't need any extra registers, and one or more
19328 simple move insns are issued. Originally small integers are not allowed
19329 in FPR/VSX registers. Single precision binary floating is not a simple
19330 move because we need to convert to the single precision memory layout.
19331 The 4-byte SDmode can be moved. TDmode values are disallowed since they
19332 need special direct move handling, which we do not support yet. */
19333 if (TARGET_DIRECT_MOVE
19334 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19335 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
19337 if (TARGET_POWERPC64)
19339 /* ISA 2.07: MTVSRD or MVFVSRD. */
19343 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
19344 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
19348 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19349 if (TARGET_P8_VECTOR)
19351 if (mode == SImode)
19354 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
19358 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19359 if (mode == SDmode)
19363 /* Power6+: MFTGPR or MFFGPR. */
19364 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
19365 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
19366 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19369 /* Move to/from SPR. */
19370 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
19371 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
19372 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19378 /* Direct move helper function for rs6000_secondary_reload, handle all of the
19379 special direct moves that involve allocating an extra register, return the
19380 insn code of the helper function if there is such a function or
19381 CODE_FOR_nothing if not. */
19384 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
19385 enum rs6000_reg_type from_type,
19387 secondary_reload_info *sri,
19391 enum insn_code icode = CODE_FOR_nothing;
19393 int size = GET_MODE_SIZE (mode);
19395 if (TARGET_POWERPC64 && size == 16)
19397 /* Handle moving 128-bit values from GPRs to VSX point registers on
19398 ISA 2.07 (power8, power9) when running in 64-bit mode using
19399 XXPERMDI to glue the two 64-bit values back together. */
19400 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19402 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
19403 icode = reg_addr[mode].reload_vsx_gpr;
19406 /* Handle moving 128-bit values from VSX point registers to GPRs on
19407 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
19408 bottom 64-bit value. */
19409 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19411 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
19412 icode = reg_addr[mode].reload_gpr_vsx;
19416 else if (TARGET_POWERPC64 && mode == SFmode)
19418 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19420 cost = 3; /* xscvdpspn, mfvsrd, and. */
19421 icode = reg_addr[mode].reload_gpr_vsx;
19424 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19426 cost = 2; /* mtvsrz, xscvspdpn. */
19427 icode = reg_addr[mode].reload_vsx_gpr;
19431 else if (!TARGET_POWERPC64 && size == 8)
19433 /* Handle moving 64-bit values from GPRs to floating point registers on
19434 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
19435 32-bit values back together. Altivec register classes must be handled
19436 specially since a different instruction is used, and the secondary
19437 reload support requires a single instruction class in the scratch
19438 register constraint. However, right now TFmode is not allowed in
19439 Altivec registers, so the pattern will never match. */
19440 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
19442 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
19443 icode = reg_addr[mode].reload_fpr_gpr;
19447 if (icode != CODE_FOR_nothing)
19452 sri->icode = icode;
19453 sri->extra_cost = cost;
19460 /* Return whether a move between two register classes can be done either
19461 directly (simple move) or via a pattern that uses a single extra temporary
19462 (using ISA 2.07's direct move in this case. */
19465 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
19466 enum rs6000_reg_type from_type,
19468 secondary_reload_info *sri,
19471 /* Fall back to load/store reloads if either type is not a register. */
19472 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
19475 /* If we haven't allocated registers yet, assume the move can be done for the
19476 standard register types. */
19477 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
19478 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
19479 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
19482 /* Moves to the same set of registers is a simple move for non-specialized
19484 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
19487 /* Check whether a simple move can be done directly. */
19488 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
19492 sri->icode = CODE_FOR_nothing;
19493 sri->extra_cost = 0;
19498 /* Now check if we can do it in a few steps. */
19499 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
19503 /* Inform reload about cases where moving X with a mode MODE to a register in
19504 RCLASS requires an extra scratch or immediate register. Return the class
19505 needed for the immediate register.
19507 For VSX and Altivec, we may need a register to convert sp+offset into
19510 For misaligned 64-bit gpr loads and stores we need a register to
19511 convert an offset address to indirect. */
19514 rs6000_secondary_reload (bool in_p,
19516 reg_class_t rclass_i,
19518 secondary_reload_info *sri)
19520 enum reg_class rclass = (enum reg_class) rclass_i;
19521 reg_class_t ret = ALL_REGS;
19522 enum insn_code icode;
19523 bool default_p = false;
19524 bool done_p = false;
19526 /* Allow subreg of memory before/during reload. */
19527 bool memory_p = (MEM_P (x)
19528 || (!reload_completed && SUBREG_P (x)
19529 && MEM_P (SUBREG_REG (x))));
19531 sri->icode = CODE_FOR_nothing;
19532 sri->t_icode = CODE_FOR_nothing;
19533 sri->extra_cost = 0;
19535 ? reg_addr[mode].reload_load
19536 : reg_addr[mode].reload_store);
19538 if (REG_P (x) || register_operand (x, mode))
19540 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
19541 bool altivec_p = (rclass == ALTIVEC_REGS);
19542 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
19545 std::swap (to_type, from_type);
19547 /* Can we do a direct move of some sort? */
19548 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
19551 icode = (enum insn_code)sri->icode;
19558 /* Make sure 0.0 is not reloaded or forced into memory. */
19559 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
19566 /* If this is a scalar floating point value and we want to load it into the
19567 traditional Altivec registers, do it via a move via a traditional floating
19568 point register, unless we have D-form addressing. Also make sure that
19569 non-zero constants use a FPR. */
19570 if (!done_p && reg_addr[mode].scalar_in_vmx_p
19571 && !mode_supports_vmx_dform (mode)
19572 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19573 && (memory_p || CONST_DOUBLE_P (x)))
19580 /* Handle reload of load/stores if we have reload helper functions. */
19581 if (!done_p && icode != CODE_FOR_nothing && memory_p)
19583 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
19586 if (extra_cost >= 0)
19590 if (extra_cost > 0)
19592 sri->extra_cost = extra_cost;
19593 sri->icode = icode;
19598 /* Handle unaligned loads and stores of integer registers. */
19599 if (!done_p && TARGET_POWERPC64
19600 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19602 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
19604 rtx addr = XEXP (x, 0);
19605 rtx off = address_offset (addr);
19607 if (off != NULL_RTX)
19609 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19610 unsigned HOST_WIDE_INT offset = INTVAL (off);
19612 /* We need a secondary reload when our legitimate_address_p
19613 says the address is good (as otherwise the entire address
19614 will be reloaded), and the offset is not a multiple of
19615 four or we have an address wrap. Address wrap will only
19616 occur for LO_SUMs since legitimate_offset_address_p
19617 rejects addresses for 16-byte mems that will wrap. */
19618 if (GET_CODE (addr) == LO_SUM
19619 ? (1 /* legitimate_address_p allows any offset for lo_sum */
19620 && ((offset & 3) != 0
19621 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
19622 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
19623 && (offset & 3) != 0))
19625 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
19627 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
19628 : CODE_FOR_reload_di_load);
19630 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
19631 : CODE_FOR_reload_di_store);
19632 sri->extra_cost = 2;
19643 if (!done_p && !TARGET_POWERPC64
19644 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19646 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
19648 rtx addr = XEXP (x, 0);
19649 rtx off = address_offset (addr);
19651 if (off != NULL_RTX)
19653 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19654 unsigned HOST_WIDE_INT offset = INTVAL (off);
19656 /* We need a secondary reload when our legitimate_address_p
19657 says the address is good (as otherwise the entire address
19658 will be reloaded), and we have a wrap.
19660 legitimate_lo_sum_address_p allows LO_SUM addresses to
19661 have any offset so test for wrap in the low 16 bits.
19663 legitimate_offset_address_p checks for the range
19664 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
19665 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
19666 [0x7ff4,0x7fff] respectively, so test for the
19667 intersection of these ranges, [0x7ffc,0x7fff] and
19668 [0x7ff4,0x7ff7] respectively.
19670 Note that the address we see here may have been
19671 manipulated by legitimize_reload_address. */
19672 if (GET_CODE (addr) == LO_SUM
19673 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
19674 : offset - (0x8000 - extra) < UNITS_PER_WORD)
19677 sri->icode = CODE_FOR_reload_si_load;
19679 sri->icode = CODE_FOR_reload_si_store;
19680 sri->extra_cost = 2;
19695 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
19697 gcc_assert (ret != ALL_REGS);
19699 if (TARGET_DEBUG_ADDR)
19702 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
19704 reg_class_names[ret],
19705 in_p ? "true" : "false",
19706 reg_class_names[rclass],
19707 GET_MODE_NAME (mode));
19709 if (reload_completed)
19710 fputs (", after reload", stderr);
19713 fputs (", done_p not set", stderr);
19716 fputs (", default secondary reload", stderr);
19718 if (sri->icode != CODE_FOR_nothing)
19719 fprintf (stderr, ", reload func = %s, extra cost = %d",
19720 insn_data[sri->icode].name, sri->extra_cost);
19722 else if (sri->extra_cost > 0)
19723 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
19725 fputs ("\n", stderr);
19732 /* Better tracing for rs6000_secondary_reload_inner. */
19735 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
19740 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
19742 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
19743 store_p ? "store" : "load");
19746 set = gen_rtx_SET (mem, reg);
19748 set = gen_rtx_SET (reg, mem);
19750 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
19751 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
19754 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
19755 ATTRIBUTE_NORETURN;
19758 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
19761 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
19762 gcc_unreachable ();
19765 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
19766 reload helper functions. These were identified in
19767 rs6000_secondary_reload_memory, and if reload decided to use the secondary
19768 reload, it calls the insns:
19769 reload_<RELOAD:mode>_<P:mptrsize>_store
19770 reload_<RELOAD:mode>_<P:mptrsize>_load
19772 which in turn calls this function, to do whatever is necessary to create
19773 valid addresses. */
19776 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
19778 int regno = true_regnum (reg);
19779 machine_mode mode = GET_MODE (reg);
19780 addr_mask_type addr_mask;
19783 rtx op_reg, op0, op1;
19788 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
19789 || !base_reg_operand (scratch, GET_MODE (scratch)))
19790 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19792 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
19793 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19795 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
19796 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19798 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
19799 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19802 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19804 /* Make sure the mode is valid in this register class. */
19805 if ((addr_mask & RELOAD_REG_VALID) == 0)
19806 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19808 if (TARGET_DEBUG_ADDR)
19809 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
19811 new_addr = addr = XEXP (mem, 0);
19812 switch (GET_CODE (addr))
19814 /* Does the register class support auto update forms for this mode? If
19815 not, do the update now. We don't need a scratch register, since the
19816 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
19819 op_reg = XEXP (addr, 0);
19820 if (!base_reg_operand (op_reg, Pmode))
19821 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19823 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19825 int delta = GET_MODE_SIZE (mode);
19826 if (GET_CODE (addr) == PRE_DEC)
19828 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
19834 op0 = XEXP (addr, 0);
19835 op1 = XEXP (addr, 1);
19836 if (!base_reg_operand (op0, Pmode)
19837 || GET_CODE (op1) != PLUS
19838 || !rtx_equal_p (op0, XEXP (op1, 0)))
19839 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19841 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19843 emit_insn (gen_rtx_SET (op0, op1));
19848 /* Do we need to simulate AND -16 to clear the bottom address bits used
19849 in VMX load/stores? */
19851 op0 = XEXP (addr, 0);
19852 op1 = XEXP (addr, 1);
19853 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
19855 if (REG_P (op0) || SUBREG_P (op0))
19858 else if (GET_CODE (op1) == PLUS)
19860 emit_insn (gen_rtx_SET (scratch, op1));
19865 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19867 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
19868 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
19869 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
19870 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
19871 new_addr = scratch;
19875 /* If this is an indirect address, make sure it is a base register. */
19878 if (!base_reg_operand (addr, GET_MODE (addr)))
19880 emit_insn (gen_rtx_SET (scratch, addr));
19881 new_addr = scratch;
19885 /* If this is an indexed address, make sure the register class can handle
19886 indexed addresses for this mode. */
19888 op0 = XEXP (addr, 0);
19889 op1 = XEXP (addr, 1);
19890 if (!base_reg_operand (op0, Pmode))
19891 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19893 else if (int_reg_operand (op1, Pmode))
19895 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19897 emit_insn (gen_rtx_SET (scratch, addr));
19898 new_addr = scratch;
19902 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
19904 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
19905 || !quad_address_p (addr, mode, false))
19907 emit_insn (gen_rtx_SET (scratch, addr));
19908 new_addr = scratch;
19912 /* Make sure the register class can handle offset addresses. */
19913 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19915 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19917 emit_insn (gen_rtx_SET (scratch, addr));
19918 new_addr = scratch;
19923 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19928 op0 = XEXP (addr, 0);
19929 op1 = XEXP (addr, 1);
19930 if (!base_reg_operand (op0, Pmode))
19931 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19933 else if (int_reg_operand (op1, Pmode))
19935 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19937 emit_insn (gen_rtx_SET (scratch, addr));
19938 new_addr = scratch;
19942 /* Quad offsets are restricted and can't handle normal addresses. */
19943 else if (mode_supports_dq_form (mode))
19945 emit_insn (gen_rtx_SET (scratch, addr));
19946 new_addr = scratch;
19949 /* Make sure the register class can handle offset addresses. */
19950 else if (legitimate_lo_sum_address_p (mode, addr, false))
19952 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19954 emit_insn (gen_rtx_SET (scratch, addr));
19955 new_addr = scratch;
19960 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19967 rs6000_emit_move (scratch, addr, Pmode);
19968 new_addr = scratch;
19972 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19975 /* Adjust the address if it changed. */
19976 if (addr != new_addr)
19978 mem = replace_equiv_address_nv (mem, new_addr);
19979 if (TARGET_DEBUG_ADDR)
19980 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
19983 /* Now create the move. */
19985 emit_insn (gen_rtx_SET (mem, reg));
19987 emit_insn (gen_rtx_SET (reg, mem));
19992 /* Convert reloads involving 64-bit gprs and misaligned offset
19993 addressing, or multiple 32-bit gprs and offsets that are too large,
19994 to use indirect addressing. */
19997 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
19999 int regno = true_regnum (reg);
20000 enum reg_class rclass;
20002 rtx scratch_or_premodify = scratch;
20004 if (TARGET_DEBUG_ADDR)
20006 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
20007 store_p ? "store" : "load");
20008 fprintf (stderr, "reg:\n");
20010 fprintf (stderr, "mem:\n");
20012 fprintf (stderr, "scratch:\n");
20013 debug_rtx (scratch);
20016 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
20017 gcc_assert (MEM_P (mem));
20018 rclass = REGNO_REG_CLASS (regno);
20019 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
20020 addr = XEXP (mem, 0);
20022 if (GET_CODE (addr) == PRE_MODIFY)
20024 gcc_assert (REG_P (XEXP (addr, 0))
20025 && GET_CODE (XEXP (addr, 1)) == PLUS
20026 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
20027 scratch_or_premodify = XEXP (addr, 0);
20028 addr = XEXP (addr, 1);
20030 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
20032 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
20034 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
20036 /* Now create the move. */
20038 emit_insn (gen_rtx_SET (mem, reg));
20040 emit_insn (gen_rtx_SET (reg, mem));
20045 /* Given an rtx X being reloaded into a reg required to be
20046 in class CLASS, return the class of reg to actually use.
20047 In general this is just CLASS; but on some machines
20048 in some cases it is preferable to use a more restrictive class.
20050 On the RS/6000, we have to return NO_REGS when we want to reload a
20051 floating-point CONST_DOUBLE to force it to be copied to memory.
20053 We also don't want to reload integer values into floating-point
20054 registers if we can at all help it. In fact, this can
20055 cause reload to die, if it tries to generate a reload of CTR
20056 into a FP register and discovers it doesn't have the memory location
20059 ??? Would it be a good idea to have reload do the converse, that is
20060 try to reload floating modes into FP registers if possible?
20063 static enum reg_class
20064 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
20066 machine_mode mode = GET_MODE (x);
20067 bool is_constant = CONSTANT_P (x);
20069 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
20070 reload class for it. */
20071 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
20072 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
20075 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
20076 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
20079 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
20080 the reloading of address expressions using PLUS into floating point
20082 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
20086 /* Zero is always allowed in all VSX registers. */
20087 if (x == CONST0_RTX (mode))
20090 /* If this is a vector constant that can be formed with a few Altivec
20091 instructions, we want altivec registers. */
20092 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
20093 return ALTIVEC_REGS;
20095 /* If this is an integer constant that can easily be loaded into
20096 vector registers, allow it. */
20097 if (CONST_INT_P (x))
20099 HOST_WIDE_INT value = INTVAL (x);
20101 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
20102 2.06 can generate it in the Altivec registers with
20106 if (TARGET_P8_VECTOR)
20108 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
20109 return ALTIVEC_REGS;
20114 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
20115 a sign extend in the Altivec registers. */
20116 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
20117 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
20118 return ALTIVEC_REGS;
20121 /* Force constant to memory. */
20125 /* D-form addressing can easily reload the value. */
20126 if (mode_supports_vmx_dform (mode)
20127 || mode_supports_dq_form (mode))
20130 /* If this is a scalar floating point value and we don't have D-form
20131 addressing, prefer the traditional floating point registers so that we
20132 can use D-form (register+offset) addressing. */
20133 if (rclass == VSX_REGS
20134 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
20137 /* Prefer the Altivec registers if Altivec is handling the vector
20138 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
20140 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
20141 || mode == V1TImode)
20142 return ALTIVEC_REGS;
20147 if (is_constant || GET_CODE (x) == PLUS)
20149 if (reg_class_subset_p (GENERAL_REGS, rclass))
20150 return GENERAL_REGS;
20151 if (reg_class_subset_p (BASE_REGS, rclass))
20156 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
20157 return GENERAL_REGS;
20162 /* Debug version of rs6000_preferred_reload_class. */
20163 static enum reg_class
20164 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
20166 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
20169 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
20171 reg_class_names[ret], reg_class_names[rclass],
20172 GET_MODE_NAME (GET_MODE (x)));
20178 /* If we are copying between FP or AltiVec registers and anything else, we need
20179 a memory location. The exception is when we are targeting ppc64 and the
20180 move to/from fpr to gpr instructions are available. Also, under VSX, you
20181 can copy vector registers from the FP register set to the Altivec register
20182 set and vice versa. */
20185 rs6000_secondary_memory_needed (machine_mode mode,
20186 reg_class_t from_class,
20187 reg_class_t to_class)
20189 enum rs6000_reg_type from_type, to_type;
20190 bool altivec_p = ((from_class == ALTIVEC_REGS)
20191 || (to_class == ALTIVEC_REGS));
20193 /* If a simple/direct move is available, we don't need secondary memory */
20194 from_type = reg_class_to_reg_type[(int)from_class];
20195 to_type = reg_class_to_reg_type[(int)to_class];
20197 if (rs6000_secondary_reload_move (to_type, from_type, mode,
20198 (secondary_reload_info *)0, altivec_p))
20201 /* If we have a floating point or vector register class, we need to use
20202 memory to transfer the data. */
20203 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
20209 /* Debug version of rs6000_secondary_memory_needed. */
20211 rs6000_debug_secondary_memory_needed (machine_mode mode,
20212 reg_class_t from_class,
20213 reg_class_t to_class)
20215 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
20218 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
20219 "to_class = %s, mode = %s\n",
20220 ret ? "true" : "false",
20221 reg_class_names[from_class],
20222 reg_class_names[to_class],
20223 GET_MODE_NAME (mode));
20228 /* Return the register class of a scratch register needed to copy IN into
20229 or out of a register in RCLASS in MODE. If it can be done directly,
20230 NO_REGS is returned. */
20232 static enum reg_class
20233 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
20238 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
20240 && MACHOPIC_INDIRECT
20244 /* We cannot copy a symbolic operand directly into anything
20245 other than BASE_REGS for TARGET_ELF. So indicate that a
20246 register from BASE_REGS is needed as an intermediate
20249 On Darwin, pic addresses require a load from memory, which
20250 needs a base register. */
20251 if (rclass != BASE_REGS
20252 && (SYMBOL_REF_P (in)
20253 || GET_CODE (in) == HIGH
20254 || GET_CODE (in) == LABEL_REF
20255 || GET_CODE (in) == CONST))
20261 regno = REGNO (in);
20262 if (!HARD_REGISTER_NUM_P (regno))
20264 regno = true_regnum (in);
20265 if (!HARD_REGISTER_NUM_P (regno))
20269 else if (SUBREG_P (in))
20271 regno = true_regnum (in);
20272 if (!HARD_REGISTER_NUM_P (regno))
20278 /* If we have VSX register moves, prefer moving scalar values between
20279 Altivec registers and GPR by going via an FPR (and then via memory)
20280 instead of reloading the secondary memory address for Altivec moves. */
20282 && GET_MODE_SIZE (mode) < 16
20283 && !mode_supports_vmx_dform (mode)
20284 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
20285 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
20286 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20287 && (regno >= 0 && INT_REGNO_P (regno)))))
20290 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
20292 if (rclass == GENERAL_REGS || rclass == BASE_REGS
20293 || (regno >= 0 && INT_REGNO_P (regno)))
20296 /* Constants, memory, and VSX registers can go into VSX registers (both the
20297 traditional floating point and the altivec registers). */
20298 if (rclass == VSX_REGS
20299 && (regno == -1 || VSX_REGNO_P (regno)))
20302 /* Constants, memory, and FP registers can go into FP registers. */
20303 if ((regno == -1 || FP_REGNO_P (regno))
20304 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
20305 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
20307 /* Memory, and AltiVec registers can go into AltiVec registers. */
20308 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
20309 && rclass == ALTIVEC_REGS)
20312 /* We can copy among the CR registers. */
20313 if ((rclass == CR_REGS || rclass == CR0_REGS)
20314 && regno >= 0 && CR_REGNO_P (regno))
20317 /* Otherwise, we need GENERAL_REGS. */
20318 return GENERAL_REGS;
20321 /* Debug version of rs6000_secondary_reload_class. */
20322 static enum reg_class
20323 rs6000_debug_secondary_reload_class (enum reg_class rclass,
20324 machine_mode mode, rtx in)
20326 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
20328 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
20329 "mode = %s, input rtx:\n",
20330 reg_class_names[ret], reg_class_names[rclass],
20331 GET_MODE_NAME (mode));
20337 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
20340 rs6000_can_change_mode_class (machine_mode from,
20342 reg_class_t rclass)
20344 unsigned from_size = GET_MODE_SIZE (from);
20345 unsigned to_size = GET_MODE_SIZE (to);
20347 if (from_size != to_size)
20349 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
20351 if (reg_classes_intersect_p (xclass, rclass))
20353 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
20354 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
20355 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
20356 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
20358 /* Don't allow 64-bit types to overlap with 128-bit types that take a
20359 single register under VSX because the scalar part of the register
20360 is in the upper 64-bits, and not the lower 64-bits. Types like
20361 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
20362 IEEE floating point can't overlap, and neither can small
20365 if (to_float128_vector_p && from_float128_vector_p)
20368 else if (to_float128_vector_p || from_float128_vector_p)
20371 /* TDmode in floating-mode registers must always go into a register
20372 pair with the most significant word in the even-numbered register
20373 to match ISA requirements. In little-endian mode, this does not
20374 match subreg numbering, so we cannot allow subregs. */
20375 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
20378 if (from_size < 8 || to_size < 8)
20381 if (from_size == 8 && (8 * to_nregs) != to_size)
20384 if (to_size == 8 && (8 * from_nregs) != from_size)
20393 /* Since the VSX register set includes traditional floating point registers
20394 and altivec registers, just check for the size being different instead of
20395 trying to check whether the modes are vector modes. Otherwise it won't
20396 allow say DF and DI to change classes. For types like TFmode and TDmode
20397 that take 2 64-bit registers, rather than a single 128-bit register, don't
20398 allow subregs of those types to other 128 bit types. */
20399 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
20401 unsigned num_regs = (from_size + 15) / 16;
20402 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
20403 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
20406 return (from_size == 8 || from_size == 16);
20409 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
20410 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
20416 /* Debug version of rs6000_can_change_mode_class. */
20418 rs6000_debug_can_change_mode_class (machine_mode from,
20420 reg_class_t rclass)
20422 bool ret = rs6000_can_change_mode_class (from, to, rclass);
20425 "rs6000_can_change_mode_class, return %s, from = %s, "
20426 "to = %s, rclass = %s\n",
20427 ret ? "true" : "false",
20428 GET_MODE_NAME (from), GET_MODE_NAME (to),
20429 reg_class_names[rclass]);
20434 /* Return a string to do a move operation of 128 bits of data. */
20437 rs6000_output_move_128bit (rtx operands[])
20439 rtx dest = operands[0];
20440 rtx src = operands[1];
20441 machine_mode mode = GET_MODE (dest);
20444 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
20445 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
20449 dest_regno = REGNO (dest);
20450 dest_gpr_p = INT_REGNO_P (dest_regno);
20451 dest_fp_p = FP_REGNO_P (dest_regno);
20452 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
20453 dest_vsx_p = dest_fp_p | dest_vmx_p;
20458 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
20463 src_regno = REGNO (src);
20464 src_gpr_p = INT_REGNO_P (src_regno);
20465 src_fp_p = FP_REGNO_P (src_regno);
20466 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
20467 src_vsx_p = src_fp_p | src_vmx_p;
20472 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
20475 /* Register moves. */
20476 if (dest_regno >= 0 && src_regno >= 0)
20483 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
20484 return (WORDS_BIG_ENDIAN
20485 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
20486 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
20488 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
20492 else if (TARGET_VSX && dest_vsx_p)
20495 return "xxlor %x0,%x1,%x1";
20497 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
20498 return (WORDS_BIG_ENDIAN
20499 ? "mtvsrdd %x0,%1,%L1"
20500 : "mtvsrdd %x0,%L1,%1");
20502 else if (TARGET_DIRECT_MOVE && src_gpr_p)
20506 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
20507 return "vor %0,%1,%1";
20509 else if (dest_fp_p && src_fp_p)
20514 else if (dest_regno >= 0 && MEM_P (src))
20518 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20524 else if (TARGET_ALTIVEC && dest_vmx_p
20525 && altivec_indexed_or_indirect_operand (src, mode))
20526 return "lvx %0,%y1";
20528 else if (TARGET_VSX && dest_vsx_p)
20530 if (mode_supports_dq_form (mode)
20531 && quad_address_p (XEXP (src, 0), mode, true))
20532 return "lxv %x0,%1";
20534 else if (TARGET_P9_VECTOR)
20535 return "lxvx %x0,%y1";
20537 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20538 return "lxvw4x %x0,%y1";
20541 return "lxvd2x %x0,%y1";
20544 else if (TARGET_ALTIVEC && dest_vmx_p)
20545 return "lvx %0,%y1";
20547 else if (dest_fp_p)
20552 else if (src_regno >= 0 && MEM_P (dest))
20556 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20557 return "stq %1,%0";
20562 else if (TARGET_ALTIVEC && src_vmx_p
20563 && altivec_indexed_or_indirect_operand (dest, mode))
20564 return "stvx %1,%y0";
20566 else if (TARGET_VSX && src_vsx_p)
20568 if (mode_supports_dq_form (mode)
20569 && quad_address_p (XEXP (dest, 0), mode, true))
20570 return "stxv %x1,%0";
20572 else if (TARGET_P9_VECTOR)
20573 return "stxvx %x1,%y0";
20575 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20576 return "stxvw4x %x1,%y0";
20579 return "stxvd2x %x1,%y0";
20582 else if (TARGET_ALTIVEC && src_vmx_p)
20583 return "stvx %1,%y0";
20590 else if (dest_regno >= 0
20591 && (CONST_INT_P (src)
20592 || CONST_WIDE_INT_P (src)
20593 || CONST_DOUBLE_P (src)
20594 || GET_CODE (src) == CONST_VECTOR))
20599 else if ((dest_vmx_p && TARGET_ALTIVEC)
20600 || (dest_vsx_p && TARGET_VSX))
20601 return output_vec_const_move (operands);
20604 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
20607 /* Validate a 128-bit move. */
20609 rs6000_move_128bit_ok_p (rtx operands[])
20611 machine_mode mode = GET_MODE (operands[0]);
20612 return (gpc_reg_operand (operands[0], mode)
20613 || gpc_reg_operand (operands[1], mode));
20616 /* Return true if a 128-bit move needs to be split. */
20618 rs6000_split_128bit_ok_p (rtx operands[])
20620 if (!reload_completed)
20623 if (!gpr_or_gpr_p (operands[0], operands[1]))
20626 if (quad_load_store_p (operands[0], operands[1]))
20633 /* Given a comparison operation, return the bit number in CCR to test. We
20634 know this is a valid comparison.
20636 SCC_P is 1 if this is for an scc. That means that %D will have been
20637 used instead of %C, so the bits will be in different places.
20639 Return -1 if OP isn't a valid comparison for some reason. */
20642 ccr_bit (rtx op, int scc_p)
20644 enum rtx_code code = GET_CODE (op);
20645 machine_mode cc_mode;
20650 if (!COMPARISON_P (op))
20653 reg = XEXP (op, 0);
20655 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
20658 cc_mode = GET_MODE (reg);
20659 cc_regnum = REGNO (reg);
20660 base_bit = 4 * (cc_regnum - CR0_REGNO);
20662 validate_condition_mode (code, cc_mode);
20664 /* When generating a sCOND operation, only positive conditions are
20683 return scc_p ? base_bit + 3 : base_bit + 2;
20685 return base_bit + 2;
20686 case GT: case GTU: case UNLE:
20687 return base_bit + 1;
20688 case LT: case LTU: case UNGE:
20690 case ORDERED: case UNORDERED:
20691 return base_bit + 3;
20694 /* If scc, we will have done a cror to put the bit in the
20695 unordered position. So test that bit. For integer, this is ! LT
20696 unless this is an scc insn. */
20697 return scc_p ? base_bit + 3 : base_bit;
20700 return scc_p ? base_bit + 3 : base_bit + 1;
20707 /* Return the GOT register. */
20710 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
20712 /* The second flow pass currently (June 1999) can't update
20713 regs_ever_live without disturbing other parts of the compiler, so
20714 update it here to make the prolog/epilogue code happy. */
20715 if (!can_create_pseudo_p ()
20716 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20717 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
20719 crtl->uses_pic_offset_table = 1;
20721 return pic_offset_table_rtx;
20724 static rs6000_stack_t stack_info;
20726 /* Function to init struct machine_function.
20727 This will be called, via a pointer variable,
20728 from push_function_context. */
20730 static struct machine_function *
20731 rs6000_init_machine_status (void)
20733 stack_info.reload_completed = 0;
20734 return ggc_cleared_alloc<machine_function> ();
20737 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
20739 /* Write out a function code label. */
20742 rs6000_output_function_entry (FILE *file, const char *fname)
20744 if (fname[0] != '.')
20746 switch (DEFAULT_ABI)
20749 gcc_unreachable ();
20755 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
20765 RS6000_OUTPUT_BASENAME (file, fname);
20768 /* Print an operand. Recognize special options, documented below. */
20771 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
20772 only introduced by the linker, when applying the sda21
20774 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
20775 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
20777 #define SMALL_DATA_RELOC "sda21"
20778 #define SMALL_DATA_REG 0
20782 print_operand (FILE *file, rtx x, int code)
20785 unsigned HOST_WIDE_INT uval;
20789 /* %a is output_address. */
20791 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
20795 /* Like 'J' but get to the GT bit only. */
20796 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20798 output_operand_lossage ("invalid %%D value");
20802 /* Bit 1 is GT bit. */
20803 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
20805 /* Add one for shift count in rlinm for scc. */
20806 fprintf (file, "%d", i + 1);
20810 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
20813 output_operand_lossage ("invalid %%e value");
20818 if ((uval & 0xffff) == 0 && uval != 0)
20823 /* X is a CR register. Print the number of the EQ bit of the CR */
20824 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20825 output_operand_lossage ("invalid %%E value");
20827 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
20831 /* X is a CR register. Print the shift count needed to move it
20832 to the high-order four bits. */
20833 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20834 output_operand_lossage ("invalid %%f value");
20836 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
20840 /* Similar, but print the count for the rotate in the opposite
20842 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20843 output_operand_lossage ("invalid %%F value");
20845 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
20849 /* X is a constant integer. If it is negative, print "m",
20850 otherwise print "z". This is to make an aze or ame insn. */
20851 if (!CONST_INT_P (x))
20852 output_operand_lossage ("invalid %%G value");
20853 else if (INTVAL (x) >= 0)
20860 /* If constant, output low-order five bits. Otherwise, write
20863 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
20865 print_operand (file, x, 0);
20869 /* If constant, output low-order six bits. Otherwise, write
20872 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
20874 print_operand (file, x, 0);
20878 /* Print `i' if this is a constant, else nothing. */
20884 /* Write the bit number in CCR for jump. */
20885 i = ccr_bit (x, 0);
20887 output_operand_lossage ("invalid %%j code");
20889 fprintf (file, "%d", i);
20893 /* Similar, but add one for shift count in rlinm for scc and pass
20894 scc flag to `ccr_bit'. */
20895 i = ccr_bit (x, 1);
20897 output_operand_lossage ("invalid %%J code");
20899 /* If we want bit 31, write a shift count of zero, not 32. */
20900 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20904 /* X must be a constant. Write the 1's complement of the
20907 output_operand_lossage ("invalid %%k value");
20909 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
20913 /* X must be a symbolic constant on ELF. Write an
20914 expression suitable for an 'addi' that adds in the low 16
20915 bits of the MEM. */
20916 if (GET_CODE (x) == CONST)
20918 if (GET_CODE (XEXP (x, 0)) != PLUS
20919 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
20920 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
20921 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20922 output_operand_lossage ("invalid %%K value");
20924 print_operand_address (file, x);
20925 fputs ("@l", file);
20928 /* %l is output_asm_label. */
20931 /* Write second word of DImode or DFmode reference. Works on register
20932 or non-indexed memory only. */
20934 fputs (reg_names[REGNO (x) + 1], file);
20935 else if (MEM_P (x))
20937 machine_mode mode = GET_MODE (x);
20938 /* Handle possible auto-increment. Since it is pre-increment and
20939 we have already done it, we can just use an offset of word. */
20940 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20941 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20942 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20944 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20945 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20948 output_address (mode, XEXP (adjust_address_nv (x, SImode,
20952 if (small_data_operand (x, GET_MODE (x)))
20953 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20954 reg_names[SMALL_DATA_REG]);
20958 case 'N': /* Unused */
20959 /* Write the number of elements in the vector times 4. */
20960 if (GET_CODE (x) != PARALLEL)
20961 output_operand_lossage ("invalid %%N value");
20963 fprintf (file, "%d", XVECLEN (x, 0) * 4);
20966 case 'O': /* Unused */
20967 /* Similar, but subtract 1 first. */
20968 if (GET_CODE (x) != PARALLEL)
20969 output_operand_lossage ("invalid %%O value");
20971 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
20975 /* X is a CONST_INT that is a power of two. Output the logarithm. */
20978 || (i = exact_log2 (INTVAL (x))) < 0)
20979 output_operand_lossage ("invalid %%p value");
20981 fprintf (file, "%d", i);
20985 /* The operand must be an indirect memory reference. The result
20986 is the register name. */
20987 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
20988 || REGNO (XEXP (x, 0)) >= 32)
20989 output_operand_lossage ("invalid %%P value");
20991 fputs (reg_names[REGNO (XEXP (x, 0))], file);
20995 /* This outputs the logical code corresponding to a boolean
20996 expression. The expression may have one or both operands
20997 negated (if one, only the first one). For condition register
20998 logical operations, it will also treat the negated
20999 CR codes as NOTs, but not handle NOTs of them. */
21001 const char *const *t = 0;
21003 enum rtx_code code = GET_CODE (x);
21004 static const char * const tbl[3][3] = {
21005 { "and", "andc", "nor" },
21006 { "or", "orc", "nand" },
21007 { "xor", "eqv", "xor" } };
21011 else if (code == IOR)
21013 else if (code == XOR)
21016 output_operand_lossage ("invalid %%q value");
21018 if (GET_CODE (XEXP (x, 0)) != NOT)
21022 if (GET_CODE (XEXP (x, 1)) == NOT)
21033 if (! TARGET_MFCRF)
21039 /* X is a CR register. Print the mask for `mtcrf'. */
21040 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
21041 output_operand_lossage ("invalid %%R value");
21043 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
21047 /* Low 5 bits of 32 - value */
21049 output_operand_lossage ("invalid %%s value");
21051 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
21055 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
21056 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
21058 output_operand_lossage ("invalid %%t value");
21062 /* Bit 3 is OV bit. */
21063 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
21065 /* If we want bit 31, write a shift count of zero, not 32. */
21066 fprintf (file, "%d", i == 31 ? 0 : i + 1);
21070 /* Print the symbolic name of a branch target register. */
21071 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
21072 x = XVECEXP (x, 0, 0);
21073 if (!REG_P (x) || (REGNO (x) != LR_REGNO
21074 && REGNO (x) != CTR_REGNO))
21075 output_operand_lossage ("invalid %%T value");
21076 else if (REGNO (x) == LR_REGNO)
21077 fputs ("lr", file);
21079 fputs ("ctr", file);
21083 /* High-order or low-order 16 bits of constant, whichever is non-zero,
21084 for use in unsigned operand. */
21087 output_operand_lossage ("invalid %%u value");
21092 if ((uval & 0xffff) == 0)
21095 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
21099 /* High-order 16 bits of constant for use in signed operand. */
21101 output_operand_lossage ("invalid %%v value");
21103 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
21104 (INTVAL (x) >> 16) & 0xffff);
21108 /* Print `u' if this has an auto-increment or auto-decrement. */
21110 && (GET_CODE (XEXP (x, 0)) == PRE_INC
21111 || GET_CODE (XEXP (x, 0)) == PRE_DEC
21112 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
21117 /* Print the trap code for this operand. */
21118 switch (GET_CODE (x))
21121 fputs ("eq", file); /* 4 */
21124 fputs ("ne", file); /* 24 */
21127 fputs ("lt", file); /* 16 */
21130 fputs ("le", file); /* 20 */
21133 fputs ("gt", file); /* 8 */
21136 fputs ("ge", file); /* 12 */
21139 fputs ("llt", file); /* 2 */
21142 fputs ("lle", file); /* 6 */
21145 fputs ("lgt", file); /* 1 */
21148 fputs ("lge", file); /* 5 */
21151 output_operand_lossage ("invalid %%V value");
21156 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
21159 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
21160 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
21162 print_operand (file, x, 0);
21166 /* X is a FPR or Altivec register used in a VSX context. */
21167 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
21168 output_operand_lossage ("invalid %%x value");
21171 int reg = REGNO (x);
21172 int vsx_reg = (FP_REGNO_P (reg)
21174 : reg - FIRST_ALTIVEC_REGNO + 32);
21176 #ifdef TARGET_REGNAMES
21177 if (TARGET_REGNAMES)
21178 fprintf (file, "%%vs%d", vsx_reg);
21181 fprintf (file, "%d", vsx_reg);
21187 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
21188 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
21189 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
21194 /* Like 'L', for third word of TImode/PTImode */
21196 fputs (reg_names[REGNO (x) + 2], file);
21197 else if (MEM_P (x))
21199 machine_mode mode = GET_MODE (x);
21200 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21201 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21202 output_address (mode, plus_constant (Pmode,
21203 XEXP (XEXP (x, 0), 0), 8));
21204 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21205 output_address (mode, plus_constant (Pmode,
21206 XEXP (XEXP (x, 0), 0), 8));
21208 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
21209 if (small_data_operand (x, GET_MODE (x)))
21210 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21211 reg_names[SMALL_DATA_REG]);
21216 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
21217 x = XVECEXP (x, 0, 1);
21218 /* X is a SYMBOL_REF. Write out the name preceded by a
21219 period and without any trailing data in brackets. Used for function
21220 names. If we are configured for System V (or the embedded ABI) on
21221 the PowerPC, do not emit the period, since those systems do not use
21222 TOCs and the like. */
21223 if (!SYMBOL_REF_P (x))
21225 output_operand_lossage ("invalid %%z value");
21229 /* For macho, check to see if we need a stub. */
21232 const char *name = XSTR (x, 0);
21234 if (darwin_emit_branch_islands
21235 && MACHOPIC_INDIRECT
21236 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
21237 name = machopic_indirection_name (x, /*stub_p=*/true);
21239 assemble_name (file, name);
21241 else if (!DOT_SYMBOLS)
21242 assemble_name (file, XSTR (x, 0));
21244 rs6000_output_function_entry (file, XSTR (x, 0));
21248 /* Like 'L', for last word of TImode/PTImode. */
21250 fputs (reg_names[REGNO (x) + 3], file);
21251 else if (MEM_P (x))
21253 machine_mode mode = GET_MODE (x);
21254 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21255 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21256 output_address (mode, plus_constant (Pmode,
21257 XEXP (XEXP (x, 0), 0), 12));
21258 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21259 output_address (mode, plus_constant (Pmode,
21260 XEXP (XEXP (x, 0), 0), 12));
21262 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
21263 if (small_data_operand (x, GET_MODE (x)))
21264 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21265 reg_names[SMALL_DATA_REG]);
21269 /* Print AltiVec memory operand. */
21274 gcc_assert (MEM_P (x));
21278 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
21279 && GET_CODE (tmp) == AND
21280 && CONST_INT_P (XEXP (tmp, 1))
21281 && INTVAL (XEXP (tmp, 1)) == -16)
21282 tmp = XEXP (tmp, 0);
21283 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
21284 && GET_CODE (tmp) == PRE_MODIFY)
21285 tmp = XEXP (tmp, 1);
21287 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
21290 if (GET_CODE (tmp) != PLUS
21291 || !REG_P (XEXP (tmp, 0))
21292 || !REG_P (XEXP (tmp, 1)))
21294 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
21298 if (REGNO (XEXP (tmp, 0)) == 0)
21299 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
21300 reg_names[ REGNO (XEXP (tmp, 0)) ]);
21302 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
21303 reg_names[ REGNO (XEXP (tmp, 1)) ]);
21310 fprintf (file, "%s", reg_names[REGNO (x)]);
21311 else if (MEM_P (x))
21313 /* We need to handle PRE_INC and PRE_DEC here, since we need to
21314 know the width from the mode. */
21315 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
21316 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
21317 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21318 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
21319 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
21320 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21321 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21322 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
21324 output_address (GET_MODE (x), XEXP (x, 0));
21326 else if (toc_relative_expr_p (x, false,
21327 &tocrel_base_oac, &tocrel_offset_oac))
21328 /* This hack along with a corresponding hack in
21329 rs6000_output_addr_const_extra arranges to output addends
21330 where the assembler expects to find them. eg.
21331 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
21332 without this hack would be output as "x@toc+4". We
21334 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21335 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
21336 output_addr_const (file, XVECEXP (x, 0, 0));
21337 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
21338 output_addr_const (file, XVECEXP (x, 0, 1));
21340 output_addr_const (file, x);
21344 if (const char *name = get_some_local_dynamic_name ())
21345 assemble_name (file, name);
21347 output_operand_lossage ("'%%&' used without any "
21348 "local dynamic TLS references");
21352 output_operand_lossage ("invalid %%xn code");
21356 /* Print the address of an operand. */
21359 print_operand_address (FILE *file, rtx x)
21362 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
21363 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
21364 || GET_CODE (x) == LABEL_REF)
21366 output_addr_const (file, x);
21367 if (small_data_operand (x, GET_MODE (x)))
21368 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21369 reg_names[SMALL_DATA_REG]);
21371 gcc_assert (!TARGET_TOC);
21373 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21374 && REG_P (XEXP (x, 1)))
21376 if (REGNO (XEXP (x, 0)) == 0)
21377 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
21378 reg_names[ REGNO (XEXP (x, 0)) ]);
21380 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
21381 reg_names[ REGNO (XEXP (x, 1)) ]);
21383 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21384 && CONST_INT_P (XEXP (x, 1)))
21385 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
21386 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
21388 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21389 && CONSTANT_P (XEXP (x, 1)))
21391 fprintf (file, "lo16(");
21392 output_addr_const (file, XEXP (x, 1));
21393 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21397 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21398 && CONSTANT_P (XEXP (x, 1)))
21400 output_addr_const (file, XEXP (x, 1));
21401 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21404 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
21406 /* This hack along with a corresponding hack in
21407 rs6000_output_addr_const_extra arranges to output addends
21408 where the assembler expects to find them. eg.
21410 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
21411 without this hack would be output as "x@toc+8@l(9)". We
21412 want "x+8@toc@l(9)". */
21413 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21414 if (GET_CODE (x) == LO_SUM)
21415 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
21417 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
21420 output_addr_const (file, x);
21423 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
21426 rs6000_output_addr_const_extra (FILE *file, rtx x)
21428 if (GET_CODE (x) == UNSPEC)
21429 switch (XINT (x, 1))
21431 case UNSPEC_TOCREL:
21432 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
21433 && REG_P (XVECEXP (x, 0, 1))
21434 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
21435 output_addr_const (file, XVECEXP (x, 0, 0));
21436 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
21438 if (INTVAL (tocrel_offset_oac) >= 0)
21439 fprintf (file, "+");
21440 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
21442 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
21445 assemble_name (file, toc_label_name);
21448 else if (TARGET_ELF)
21449 fputs ("@toc", file);
21453 case UNSPEC_MACHOPIC_OFFSET:
21454 output_addr_const (file, XVECEXP (x, 0, 0));
21456 machopic_output_function_base_name (file);
21463 /* Target hook for assembling integer objects. The PowerPC version has
21464 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
21465 is defined. It also needs to handle DI-mode objects on 64-bit
21469 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
21471 #ifdef RELOCATABLE_NEEDS_FIXUP
21472 /* Special handling for SI values. */
21473 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
21475 static int recurse = 0;
21477 /* For -mrelocatable, we mark all addresses that need to be fixed up in
21478 the .fixup section. Since the TOC section is already relocated, we
21479 don't need to mark it here. We used to skip the text section, but it
21480 should never be valid for relocated addresses to be placed in the text
21482 if (DEFAULT_ABI == ABI_V4
21483 && (TARGET_RELOCATABLE || flag_pic > 1)
21484 && in_section != toc_section
21486 && !CONST_SCALAR_INT_P (x)
21492 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
21494 ASM_OUTPUT_LABEL (asm_out_file, buf);
21495 fprintf (asm_out_file, "\t.long\t(");
21496 output_addr_const (asm_out_file, x);
21497 fprintf (asm_out_file, ")@fixup\n");
21498 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
21499 ASM_OUTPUT_ALIGN (asm_out_file, 2);
21500 fprintf (asm_out_file, "\t.long\t");
21501 assemble_name (asm_out_file, buf);
21502 fprintf (asm_out_file, "\n\t.previous\n");
21506 /* Remove initial .'s to turn a -mcall-aixdesc function
21507 address into the address of the descriptor, not the function
21509 else if (SYMBOL_REF_P (x)
21510 && XSTR (x, 0)[0] == '.'
21511 && DEFAULT_ABI == ABI_AIX)
21513 const char *name = XSTR (x, 0);
21514 while (*name == '.')
21517 fprintf (asm_out_file, "\t.long\t%s\n", name);
21521 #endif /* RELOCATABLE_NEEDS_FIXUP */
21522 return default_assemble_integer (x, size, aligned_p);
21525 /* Return a template string for assembly to emit when making an
21526 external call. FUNOP is the call mem argument operand number. */
21528 static const char *
21529 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
21531 /* -Wformat-overflow workaround, without which gcc thinks that %u
21532 might produce 10 digits. */
21533 gcc_assert (funop <= MAX_RECOG_OPERANDS);
21537 if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
21539 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
21540 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
21541 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
21542 sprintf (arg, "(%%&@tlsld)");
21544 gcc_unreachable ();
21547 /* The magic 32768 offset here corresponds to the offset of
21548 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
21550 sprintf (z, "%%z%u%s", funop,
21551 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
21554 static char str[32]; /* 2 spare */
21555 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21556 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
21557 sibcall ? "" : "\n\tnop");
21558 else if (DEFAULT_ABI == ABI_V4)
21559 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
21560 flag_pic ? "@plt" : "");
21562 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
21563 else if (DEFAULT_ABI == ABI_DARWIN)
21565 /* The cookie is in operand func+2. */
21566 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
21567 int cookie = INTVAL (operands[funop + 2]);
21568 if (cookie & CALL_LONG)
21570 tree funname = get_identifier (XSTR (operands[funop], 0));
21571 tree labelname = get_prev_label (funname);
21572 gcc_checking_assert (labelname && !sibcall);
21574 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
21575 instruction will reach 'foo', otherwise link as 'bl L42'".
21576 "L42" should be a 'branch island', that will do a far jump to
21577 'foo'. Branch islands are generated in
21578 macho_branch_islands(). */
21579 sprintf (str, "jbsr %%z%u,%.10s", funop,
21580 IDENTIFIER_POINTER (labelname));
21583 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
21585 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
21589 gcc_unreachable ();
21594 rs6000_call_template (rtx *operands, unsigned int funop)
21596 return rs6000_call_template_1 (operands, funop, false);
21600 rs6000_sibcall_template (rtx *operands, unsigned int funop)
21602 return rs6000_call_template_1 (operands, funop, true);
21605 /* As above, for indirect calls. */
21607 static const char *
21608 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
21611 /* -Wformat-overflow workaround, without which gcc thinks that %u
21612 might produce 10 digits. Note that -Wformat-overflow will not
21613 currently warn here for str[], so do not rely on a warning to
21614 ensure str[] is correctly sized. */
21615 gcc_assert (funop <= MAX_RECOG_OPERANDS);
21617 /* Currently, funop is either 0 or 1. The maximum string is always
21618 a !speculate 64-bit __tls_get_addr call.
21622 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21623 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
21625 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21626 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
21633 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21634 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
21636 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21637 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
21644 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21645 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
21647 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21648 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
21652 static char str[160]; /* 8 spare */
21654 const char *ptrload = TARGET_64BIT ? "d" : "wz";
21656 if (DEFAULT_ABI == ABI_AIX)
21659 ptrload, funop + 2);
21661 /* We don't need the extra code to stop indirect call speculation if
21663 bool speculate = (TARGET_MACHO
21664 || rs6000_speculate_indirect_jumps
21665 || (REG_P (operands[funop])
21666 && REGNO (operands[funop]) == LR_REGNO));
21668 if (!TARGET_MACHO && HAVE_AS_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
21670 const char *rel64 = TARGET_64BIT ? "64" : "";
21673 if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
21675 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
21676 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
21678 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
21679 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
21682 gcc_unreachable ();
21685 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
21686 && flag_pic == 2 ? "+32768" : "");
21690 "%s.reloc .,R_PPC%s_PLTSEQ,%%z%u%s\n\t",
21691 tls, rel64, funop, addend);
21692 s += sprintf (s, "crset 2\n\t");
21695 "%s.reloc .,R_PPC%s_PLTCALL,%%z%u%s\n\t",
21696 tls, rel64, funop, addend);
21698 else if (!speculate)
21699 s += sprintf (s, "crset 2\n\t");
21701 if (DEFAULT_ABI == ABI_AIX)
21707 funop, ptrload, funop + 3);
21712 funop, ptrload, funop + 3);
21714 else if (DEFAULT_ABI == ABI_ELFv2)
21720 funop, ptrload, funop + 2);
21725 funop, ptrload, funop + 2);
21732 funop, sibcall ? "" : "l");
21736 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
21742 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
21744 return rs6000_indirect_call_template_1 (operands, funop, false);
21748 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
21750 return rs6000_indirect_call_template_1 (operands, funop, true);
21754 /* Output indirect call insns.
21755 WHICH is 0 for tocsave, 1 for plt16_ha, 2 for plt16_lo, 3 for mtctr. */
21757 rs6000_pltseq_template (rtx *operands, int which)
21759 const char *rel64 = TARGET_64BIT ? "64" : "";
21762 if (TARGET_TLS_MARKERS && GET_CODE (operands[3]) == UNSPEC)
21764 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
21765 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%3\n\t",
21767 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
21768 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
21771 gcc_unreachable ();
21774 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
21775 static char str[96]; /* 15 spare */
21776 const char *off = WORDS_BIG_ENDIAN ? "+2" : "";
21777 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
21778 && flag_pic == 2 ? "+32768" : "");
21783 "%s.reloc .,R_PPC%s_PLTSEQ,%%z2\n\t"
21785 tls, rel64, TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)");
21788 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
21790 "%s.reloc .%s,R_PPC%s_PLT16_HA,%%z2\n\t"
21795 "%s.reloc .%s,R_PPC%s_PLT16_HA,%%z2%s\n\t"
21797 tls, off, rel64, addend);
21801 "%s.reloc .%s,R_PPC%s_PLT16_LO%s,%%z2%s\n\t"
21803 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend,
21804 TARGET_64BIT ? "d" : "wz");
21808 "%s.reloc .,R_PPC%s_PLTSEQ,%%z2%s\n\t"
21810 tls, rel64, addend);
21813 gcc_unreachable ();
21819 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
21820 /* Emit an assembler directive to set symbol visibility for DECL to
21821 VISIBILITY_TYPE. */
21824 rs6000_assemble_visibility (tree decl, int vis)
21829 /* Functions need to have their entry point symbol visibility set as
21830 well as their descriptor symbol visibility. */
21831 if (DEFAULT_ABI == ABI_AIX
21833 && TREE_CODE (decl) == FUNCTION_DECL)
21835 static const char * const visibility_types[] = {
21836 NULL, "protected", "hidden", "internal"
21839 const char *name, *type;
21841 name = ((* targetm.strip_name_encoding)
21842 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
21843 type = visibility_types[vis];
21845 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
21846 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
21849 default_assemble_visibility (decl, vis);
21854 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
21856 /* Reversal of FP compares takes care -- an ordered compare
21857 becomes an unordered compare and vice versa. */
21858 if (mode == CCFPmode
21859 && (!flag_finite_math_only
21860 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
21861 || code == UNEQ || code == LTGT))
21862 return reverse_condition_maybe_unordered (code);
21864 return reverse_condition (code);
21867 /* Generate a compare for CODE. Return a brand-new rtx that
21868 represents the result of the compare. */
21871 rs6000_generate_compare (rtx cmp, machine_mode mode)
21873 machine_mode comp_mode;
21874 rtx compare_result;
21875 enum rtx_code code = GET_CODE (cmp);
21876 rtx op0 = XEXP (cmp, 0);
21877 rtx op1 = XEXP (cmp, 1);
21879 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21880 comp_mode = CCmode;
21881 else if (FLOAT_MODE_P (mode))
21882 comp_mode = CCFPmode;
21883 else if (code == GTU || code == LTU
21884 || code == GEU || code == LEU)
21885 comp_mode = CCUNSmode;
21886 else if ((code == EQ || code == NE)
21887 && unsigned_reg_p (op0)
21888 && (unsigned_reg_p (op1)
21889 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
21890 /* These are unsigned values, perhaps there will be a later
21891 ordering compare that can be shared with this one. */
21892 comp_mode = CCUNSmode;
21894 comp_mode = CCmode;
21896 /* If we have an unsigned compare, make sure we don't have a signed value as
21898 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
21899 && INTVAL (op1) < 0)
21901 op0 = copy_rtx_if_shared (op0);
21902 op1 = force_reg (GET_MODE (op0), op1);
21903 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
21906 /* First, the compare. */
21907 compare_result = gen_reg_rtx (comp_mode);
21909 /* IEEE 128-bit support in VSX registers when we do not have hardware
21911 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21913 rtx libfunc = NULL_RTX;
21914 bool check_nan = false;
21921 libfunc = optab_libfunc (eq_optab, mode);
21926 libfunc = optab_libfunc (ge_optab, mode);
21931 libfunc = optab_libfunc (le_optab, mode);
21936 libfunc = optab_libfunc (unord_optab, mode);
21937 code = (code == UNORDERED) ? NE : EQ;
21943 libfunc = optab_libfunc (ge_optab, mode);
21944 code = (code == UNGE) ? GE : GT;
21950 libfunc = optab_libfunc (le_optab, mode);
21951 code = (code == UNLE) ? LE : LT;
21957 libfunc = optab_libfunc (eq_optab, mode);
21958 code = (code = UNEQ) ? EQ : NE;
21962 gcc_unreachable ();
21965 gcc_assert (libfunc);
21968 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21969 SImode, op0, mode, op1, mode);
21971 /* The library signals an exception for signalling NaNs, so we need to
21972 handle isgreater, etc. by first checking isordered. */
21975 rtx ne_rtx, normal_dest, unord_dest;
21976 rtx unord_func = optab_libfunc (unord_optab, mode);
21977 rtx join_label = gen_label_rtx ();
21978 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
21979 rtx unord_cmp = gen_reg_rtx (comp_mode);
21982 /* Test for either value being a NaN. */
21983 gcc_assert (unord_func);
21984 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
21985 SImode, op0, mode, op1, mode);
21987 /* Set value (0) if either value is a NaN, and jump to the join
21989 dest = gen_reg_rtx (SImode);
21990 emit_move_insn (dest, const1_rtx);
21991 emit_insn (gen_rtx_SET (unord_cmp,
21992 gen_rtx_COMPARE (comp_mode, unord_dest,
21995 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
21996 emit_jump_insn (gen_rtx_SET (pc_rtx,
21997 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
22001 /* Do the normal comparison, knowing that the values are not
22003 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
22004 SImode, op0, mode, op1, mode);
22006 emit_insn (gen_cstoresi4 (dest,
22007 gen_rtx_fmt_ee (code, SImode, normal_dest,
22009 normal_dest, const0_rtx));
22011 /* Join NaN and non-Nan paths. Compare dest against 0. */
22012 emit_label (join_label);
22016 emit_insn (gen_rtx_SET (compare_result,
22017 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
22022 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
22023 CLOBBERs to match cmptf_internal2 pattern. */
22024 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
22025 && FLOAT128_IBM_P (GET_MODE (op0))
22026 && TARGET_HARD_FLOAT)
22027 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22029 gen_rtx_SET (compare_result,
22030 gen_rtx_COMPARE (comp_mode, op0, op1)),
22031 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22032 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22033 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22034 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22035 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22036 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22037 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22038 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22039 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
22040 else if (GET_CODE (op1) == UNSPEC
22041 && XINT (op1, 1) == UNSPEC_SP_TEST)
22043 rtx op1b = XVECEXP (op1, 0, 0);
22044 comp_mode = CCEQmode;
22045 compare_result = gen_reg_rtx (CCEQmode);
22047 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
22049 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
22052 emit_insn (gen_rtx_SET (compare_result,
22053 gen_rtx_COMPARE (comp_mode, op0, op1)));
22056 /* Some kinds of FP comparisons need an OR operation;
22057 under flag_finite_math_only we don't bother. */
22058 if (FLOAT_MODE_P (mode)
22059 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
22060 && !flag_finite_math_only
22061 && (code == LE || code == GE
22062 || code == UNEQ || code == LTGT
22063 || code == UNGT || code == UNLT))
22065 enum rtx_code or1, or2;
22066 rtx or1_rtx, or2_rtx, compare2_rtx;
22067 rtx or_result = gen_reg_rtx (CCEQmode);
22071 case LE: or1 = LT; or2 = EQ; break;
22072 case GE: or1 = GT; or2 = EQ; break;
22073 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
22074 case LTGT: or1 = LT; or2 = GT; break;
22075 case UNGT: or1 = UNORDERED; or2 = GT; break;
22076 case UNLT: or1 = UNORDERED; or2 = LT; break;
22077 default: gcc_unreachable ();
22079 validate_condition_mode (or1, comp_mode);
22080 validate_condition_mode (or2, comp_mode);
22081 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
22082 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
22083 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
22084 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
22086 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
22088 compare_result = or_result;
22092 validate_condition_mode (code, GET_MODE (compare_result));
22094 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
22098 /* Return the diagnostic message string if the binary operation OP is
22099 not permitted on TYPE1 and TYPE2, NULL otherwise. */
22102 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
22106 machine_mode mode1 = TYPE_MODE (type1);
22107 machine_mode mode2 = TYPE_MODE (type2);
22109 /* For complex modes, use the inner type. */
22110 if (COMPLEX_MODE_P (mode1))
22111 mode1 = GET_MODE_INNER (mode1);
22113 if (COMPLEX_MODE_P (mode2))
22114 mode2 = GET_MODE_INNER (mode2);
22116 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
22117 double to intermix unless -mfloat128-convert. */
22118 if (mode1 == mode2)
22121 if (!TARGET_FLOAT128_CVT)
22123 if ((mode1 == KFmode && mode2 == IFmode)
22124 || (mode1 == IFmode && mode2 == KFmode))
22125 return N_("__float128 and __ibm128 cannot be used in the same "
22128 if (TARGET_IEEEQUAD
22129 && ((mode1 == IFmode && mode2 == TFmode)
22130 || (mode1 == TFmode && mode2 == IFmode)))
22131 return N_("__ibm128 and long double cannot be used in the same "
22134 if (!TARGET_IEEEQUAD
22135 && ((mode1 == KFmode && mode2 == TFmode)
22136 || (mode1 == TFmode && mode2 == KFmode)))
22137 return N_("__float128 and long double cannot be used in the same "
22145 /* Expand floating point conversion to/from __float128 and __ibm128. */
22148 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
22150 machine_mode dest_mode = GET_MODE (dest);
22151 machine_mode src_mode = GET_MODE (src);
22152 convert_optab cvt = unknown_optab;
22153 bool do_move = false;
22154 rtx libfunc = NULL_RTX;
22156 typedef rtx (*rtx_2func_t) (rtx, rtx);
22157 rtx_2func_t hw_convert = (rtx_2func_t)0;
22161 rtx_2func_t from_df;
22162 rtx_2func_t from_sf;
22163 rtx_2func_t from_si_sign;
22164 rtx_2func_t from_si_uns;
22165 rtx_2func_t from_di_sign;
22166 rtx_2func_t from_di_uns;
22169 rtx_2func_t to_si_sign;
22170 rtx_2func_t to_si_uns;
22171 rtx_2func_t to_di_sign;
22172 rtx_2func_t to_di_uns;
22173 } hw_conversions[2] = {
22174 /* convertions to/from KFmode */
22176 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
22177 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
22178 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
22179 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
22180 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
22181 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
22182 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
22183 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
22184 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
22185 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
22186 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
22187 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
22190 /* convertions to/from TFmode */
22192 gen_extenddftf2_hw, /* TFmode <- DFmode. */
22193 gen_extendsftf2_hw, /* TFmode <- SFmode. */
22194 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
22195 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
22196 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
22197 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
22198 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
22199 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
22200 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
22201 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
22202 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
22203 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
22207 if (dest_mode == src_mode)
22208 gcc_unreachable ();
22210 /* Eliminate memory operations. */
22212 src = force_reg (src_mode, src);
22216 rtx tmp = gen_reg_rtx (dest_mode);
22217 rs6000_expand_float128_convert (tmp, src, unsigned_p);
22218 rs6000_emit_move (dest, tmp, dest_mode);
22222 /* Convert to IEEE 128-bit floating point. */
22223 if (FLOAT128_IEEE_P (dest_mode))
22225 if (dest_mode == KFmode)
22227 else if (dest_mode == TFmode)
22230 gcc_unreachable ();
22236 hw_convert = hw_conversions[kf_or_tf].from_df;
22241 hw_convert = hw_conversions[kf_or_tf].from_sf;
22247 if (FLOAT128_IBM_P (src_mode))
22256 cvt = ufloat_optab;
22257 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
22261 cvt = sfloat_optab;
22262 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
22269 cvt = ufloat_optab;
22270 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
22274 cvt = sfloat_optab;
22275 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
22280 gcc_unreachable ();
22284 /* Convert from IEEE 128-bit floating point. */
22285 else if (FLOAT128_IEEE_P (src_mode))
22287 if (src_mode == KFmode)
22289 else if (src_mode == TFmode)
22292 gcc_unreachable ();
22298 hw_convert = hw_conversions[kf_or_tf].to_df;
22303 hw_convert = hw_conversions[kf_or_tf].to_sf;
22309 if (FLOAT128_IBM_P (dest_mode))
22319 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
22324 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
22332 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
22337 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
22342 gcc_unreachable ();
22346 /* Both IBM format. */
22347 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
22351 gcc_unreachable ();
22353 /* Handle conversion between TFmode/KFmode/IFmode. */
22355 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
22357 /* Handle conversion if we have hardware support. */
22358 else if (TARGET_FLOAT128_HW && hw_convert)
22359 emit_insn ((hw_convert) (dest, src));
22361 /* Call an external function to do the conversion. */
22362 else if (cvt != unknown_optab)
22364 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
22365 gcc_assert (libfunc != NULL_RTX);
22367 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
22370 gcc_assert (dest2 != NULL_RTX);
22371 if (!rtx_equal_p (dest, dest2))
22372 emit_move_insn (dest, dest2);
22376 gcc_unreachable ();
22382 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
22383 can be used as that dest register. Return the dest register. */
22386 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
22388 if (op2 == const0_rtx)
22391 if (GET_CODE (scratch) == SCRATCH)
22392 scratch = gen_reg_rtx (mode);
22394 if (logical_operand (op2, mode))
22395 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
22397 emit_insn (gen_rtx_SET (scratch,
22398 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
22404 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
22407 machine_mode op_mode;
22408 enum rtx_code cond_code;
22409 rtx result = operands[0];
22411 condition_rtx = rs6000_generate_compare (operands[1], mode);
22412 cond_code = GET_CODE (condition_rtx);
22414 if (cond_code == NE
22415 || cond_code == GE || cond_code == LE
22416 || cond_code == GEU || cond_code == LEU
22417 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
22419 rtx not_result = gen_reg_rtx (CCEQmode);
22420 rtx not_op, rev_cond_rtx;
22421 machine_mode cc_mode;
22423 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
22425 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
22426 SImode, XEXP (condition_rtx, 0), const0_rtx);
22427 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
22428 emit_insn (gen_rtx_SET (not_result, not_op));
22429 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
22432 op_mode = GET_MODE (XEXP (operands[1], 0));
22433 if (op_mode == VOIDmode)
22434 op_mode = GET_MODE (XEXP (operands[1], 1));
22436 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
22438 PUT_MODE (condition_rtx, DImode);
22439 convert_move (result, condition_rtx, 0);
22443 PUT_MODE (condition_rtx, SImode);
22444 emit_insn (gen_rtx_SET (result, condition_rtx));
22448 /* Emit a branch of kind CODE to location LOC. */
22451 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
22453 rtx condition_rtx, loc_ref;
22455 condition_rtx = rs6000_generate_compare (operands[0], mode);
22456 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
22457 emit_jump_insn (gen_rtx_SET (pc_rtx,
22458 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
22459 loc_ref, pc_rtx)));
22462 /* Return the string to output a conditional branch to LABEL, which is
22463 the operand template of the label, or NULL if the branch is really a
22464 conditional return.
22466 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
22467 condition code register and its mode specifies what kind of
22468 comparison we made.
22470 REVERSED is nonzero if we should reverse the sense of the comparison.
22472 INSN is the insn. */
22475 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
22477 static char string[64];
22478 enum rtx_code code = GET_CODE (op);
22479 rtx cc_reg = XEXP (op, 0);
22480 machine_mode mode = GET_MODE (cc_reg);
22481 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
22482 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
22483 int really_reversed = reversed ^ need_longbranch;
22489 validate_condition_mode (code, mode);
22491 /* Work out which way this really branches. We could use
22492 reverse_condition_maybe_unordered here always but this
22493 makes the resulting assembler clearer. */
22494 if (really_reversed)
22496 /* Reversal of FP compares takes care -- an ordered compare
22497 becomes an unordered compare and vice versa. */
22498 if (mode == CCFPmode)
22499 code = reverse_condition_maybe_unordered (code);
22501 code = reverse_condition (code);
22506 /* Not all of these are actually distinct opcodes, but
22507 we distinguish them for clarity of the resulting assembler. */
22508 case NE: case LTGT:
22509 ccode = "ne"; break;
22510 case EQ: case UNEQ:
22511 ccode = "eq"; break;
22513 ccode = "ge"; break;
22514 case GT: case GTU: case UNGT:
22515 ccode = "gt"; break;
22517 ccode = "le"; break;
22518 case LT: case LTU: case UNLT:
22519 ccode = "lt"; break;
22520 case UNORDERED: ccode = "un"; break;
22521 case ORDERED: ccode = "nu"; break;
22522 case UNGE: ccode = "nl"; break;
22523 case UNLE: ccode = "ng"; break;
22525 gcc_unreachable ();
22528 /* Maybe we have a guess as to how likely the branch is. */
22530 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
22531 if (note != NULL_RTX)
22533 /* PROB is the difference from 50%. */
22534 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
22535 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
22537 /* Only hint for highly probable/improbable branches on newer cpus when
22538 we have real profile data, as static prediction overrides processor
22539 dynamic prediction. For older cpus we may as well always hint, but
22540 assume not taken for branches that are very close to 50% as a
22541 mispredicted taken branch is more expensive than a
22542 mispredicted not-taken branch. */
22543 if (rs6000_always_hint
22544 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
22545 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
22546 && br_prob_note_reliable_p (note)))
22548 if (abs (prob) > REG_BR_PROB_BASE / 20
22549 && ((prob > 0) ^ need_longbranch))
22557 s += sprintf (s, "b%slr%s ", ccode, pred);
22559 s += sprintf (s, "b%s%s ", ccode, pred);
22561 /* We need to escape any '%' characters in the reg_names string.
22562 Assume they'd only be the first character.... */
22563 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
22565 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
22569 /* If the branch distance was too far, we may have to use an
22570 unconditional branch to go the distance. */
22571 if (need_longbranch)
22572 s += sprintf (s, ",$+8\n\tb %s", label);
22574 s += sprintf (s, ",%s", label);
22580 /* Return insn for VSX or Altivec comparisons. */
22583 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
22586 machine_mode mode = GET_MODE (op0);
22594 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22605 mask = gen_reg_rtx (mode);
22606 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
22613 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
22614 DMODE is expected destination mode. This is a recursive function. */
22617 rs6000_emit_vector_compare (enum rtx_code rcode,
22619 machine_mode dmode)
22622 bool swap_operands = false;
22623 bool try_again = false;
22625 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
22626 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
22628 /* See if the comparison works as is. */
22629 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22637 swap_operands = true;
22642 swap_operands = true;
22650 /* Invert condition and try again.
22651 e.g., A != B becomes ~(A==B). */
22653 enum rtx_code rev_code;
22654 enum insn_code nor_code;
22657 rev_code = reverse_condition_maybe_unordered (rcode);
22658 if (rev_code == UNKNOWN)
22661 nor_code = optab_handler (one_cmpl_optab, dmode);
22662 if (nor_code == CODE_FOR_nothing)
22665 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
22669 mask = gen_reg_rtx (dmode);
22670 emit_insn (GEN_FCN (nor_code) (mask, mask2));
22678 /* Try GT/GTU/LT/LTU OR EQ */
22681 enum insn_code ior_code;
22682 enum rtx_code new_code;
22703 gcc_unreachable ();
22706 ior_code = optab_handler (ior_optab, dmode);
22707 if (ior_code == CODE_FOR_nothing)
22710 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
22714 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
22718 mask = gen_reg_rtx (dmode);
22719 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
22730 std::swap (op0, op1);
22732 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22737 /* You only get two chances. */
22741 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
22742 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
22743 operands for the relation operation COND. */
22746 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
22747 rtx cond, rtx cc_op0, rtx cc_op1)
22749 machine_mode dest_mode = GET_MODE (dest);
22750 machine_mode mask_mode = GET_MODE (cc_op0);
22751 enum rtx_code rcode = GET_CODE (cond);
22752 machine_mode cc_mode = CCmode;
22755 bool invert_move = false;
22757 if (VECTOR_UNIT_NONE_P (dest_mode))
22760 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
22761 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
22765 /* Swap operands if we can, and fall back to doing the operation as
22766 specified, and doing a NOR to invert the test. */
22772 /* Invert condition and try again.
22773 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
22774 invert_move = true;
22775 rcode = reverse_condition_maybe_unordered (rcode);
22776 if (rcode == UNKNOWN)
22782 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
22784 /* Invert condition to avoid compound test. */
22785 invert_move = true;
22786 rcode = reverse_condition (rcode);
22794 /* Mark unsigned tests with CCUNSmode. */
22795 cc_mode = CCUNSmode;
22797 /* Invert condition to avoid compound test if necessary. */
22798 if (rcode == GEU || rcode == LEU)
22800 invert_move = true;
22801 rcode = reverse_condition (rcode);
22809 /* Get the vector mask for the given relational operations. */
22810 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
22816 std::swap (op_true, op_false);
22818 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
22819 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
22820 && (GET_CODE (op_true) == CONST_VECTOR
22821 || GET_CODE (op_false) == CONST_VECTOR))
22823 rtx constant_0 = CONST0_RTX (dest_mode);
22824 rtx constant_m1 = CONSTM1_RTX (dest_mode);
22826 if (op_true == constant_m1 && op_false == constant_0)
22828 emit_move_insn (dest, mask);
22832 else if (op_true == constant_0 && op_false == constant_m1)
22834 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
22838 /* If we can't use the vector comparison directly, perhaps we can use
22839 the mask for the true or false fields, instead of loading up a
22841 if (op_true == constant_m1)
22844 if (op_false == constant_0)
22848 if (!REG_P (op_true) && !SUBREG_P (op_true))
22849 op_true = force_reg (dest_mode, op_true);
22851 if (!REG_P (op_false) && !SUBREG_P (op_false))
22852 op_false = force_reg (dest_mode, op_false);
22854 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
22855 CONST0_RTX (dest_mode));
22856 emit_insn (gen_rtx_SET (dest,
22857 gen_rtx_IF_THEN_ELSE (dest_mode,
22864 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
22865 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
22866 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
22867 hardware has no such operation. */
22870 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22872 enum rtx_code code = GET_CODE (op);
22873 rtx op0 = XEXP (op, 0);
22874 rtx op1 = XEXP (op, 1);
22875 machine_mode compare_mode = GET_MODE (op0);
22876 machine_mode result_mode = GET_MODE (dest);
22877 bool max_p = false;
22879 if (result_mode != compare_mode)
22882 if (code == GE || code == GT)
22884 else if (code == LE || code == LT)
22889 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
22892 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
22898 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
22902 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
22903 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
22904 operands of the last comparison is nonzero/true, FALSE_COND if it is
22905 zero/false. Return 0 if the hardware has no such operation. */
22908 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22910 enum rtx_code code = GET_CODE (op);
22911 rtx op0 = XEXP (op, 0);
22912 rtx op1 = XEXP (op, 1);
22913 machine_mode result_mode = GET_MODE (dest);
22918 if (!can_create_pseudo_p ())
22931 code = swap_condition (code);
22932 std::swap (op0, op1);
22939 /* Generate: [(parallel [(set (dest)
22940 (if_then_else (op (cmp1) (cmp2))
22943 (clobber (scratch))])]. */
22945 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
22946 cmove_rtx = gen_rtx_SET (dest,
22947 gen_rtx_IF_THEN_ELSE (result_mode,
22952 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
22953 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22954 gen_rtvec (2, cmove_rtx, clobber_rtx)));
22959 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
22960 operands of the last comparison is nonzero/true, FALSE_COND if it
22961 is zero/false. Return 0 if the hardware has no such operation. */
22964 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22966 enum rtx_code code = GET_CODE (op);
22967 rtx op0 = XEXP (op, 0);
22968 rtx op1 = XEXP (op, 1);
22969 machine_mode compare_mode = GET_MODE (op0);
22970 machine_mode result_mode = GET_MODE (dest);
22972 bool is_against_zero;
22974 /* These modes should always match. */
22975 if (GET_MODE (op1) != compare_mode
22976 /* In the isel case however, we can use a compare immediate, so
22977 op1 may be a small constant. */
22978 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
22980 if (GET_MODE (true_cond) != result_mode)
22982 if (GET_MODE (false_cond) != result_mode)
22985 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
22986 if (TARGET_P9_MINMAX
22987 && (compare_mode == SFmode || compare_mode == DFmode)
22988 && (result_mode == SFmode || result_mode == DFmode))
22990 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
22993 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
22997 /* Don't allow using floating point comparisons for integer results for
22999 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
23002 /* First, work out if the hardware can do this at all, or
23003 if it's too slow.... */
23004 if (!FLOAT_MODE_P (compare_mode))
23007 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
23011 is_against_zero = op1 == CONST0_RTX (compare_mode);
23013 /* A floating-point subtract might overflow, underflow, or produce
23014 an inexact result, thus changing the floating-point flags, so it
23015 can't be generated if we care about that. It's safe if one side
23016 of the construct is zero, since then no subtract will be
23018 if (SCALAR_FLOAT_MODE_P (compare_mode)
23019 && flag_trapping_math && ! is_against_zero)
23022 /* Eliminate half of the comparisons by switching operands, this
23023 makes the remaining code simpler. */
23024 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
23025 || code == LTGT || code == LT || code == UNLE)
23027 code = reverse_condition_maybe_unordered (code);
23029 true_cond = false_cond;
23033 /* UNEQ and LTGT take four instructions for a comparison with zero,
23034 it'll probably be faster to use a branch here too. */
23035 if (code == UNEQ && HONOR_NANS (compare_mode))
23038 /* We're going to try to implement comparisons by performing
23039 a subtract, then comparing against zero. Unfortunately,
23040 Inf - Inf is NaN which is not zero, and so if we don't
23041 know that the operand is finite and the comparison
23042 would treat EQ different to UNORDERED, we can't do it. */
23043 if (HONOR_INFINITIES (compare_mode)
23044 && code != GT && code != UNGE
23045 && (!CONST_DOUBLE_P (op1)
23046 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
23047 /* Constructs of the form (a OP b ? a : b) are safe. */
23048 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
23049 || (! rtx_equal_p (op0, true_cond)
23050 && ! rtx_equal_p (op1, true_cond))))
23053 /* At this point we know we can use fsel. */
23055 /* Reduce the comparison to a comparison against zero. */
23056 if (! is_against_zero)
23058 temp = gen_reg_rtx (compare_mode);
23059 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
23061 op1 = CONST0_RTX (compare_mode);
23064 /* If we don't care about NaNs we can reduce some of the comparisons
23065 down to faster ones. */
23066 if (! HONOR_NANS (compare_mode))
23072 true_cond = false_cond;
23085 /* Now, reduce everything down to a GE. */
23092 temp = gen_reg_rtx (compare_mode);
23093 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23098 temp = gen_reg_rtx (compare_mode);
23099 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
23104 temp = gen_reg_rtx (compare_mode);
23105 emit_insn (gen_rtx_SET (temp,
23106 gen_rtx_NEG (compare_mode,
23107 gen_rtx_ABS (compare_mode, op0))));
23112 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
23113 temp = gen_reg_rtx (result_mode);
23114 emit_insn (gen_rtx_SET (temp,
23115 gen_rtx_IF_THEN_ELSE (result_mode,
23116 gen_rtx_GE (VOIDmode,
23118 true_cond, false_cond)));
23119 false_cond = true_cond;
23122 temp = gen_reg_rtx (compare_mode);
23123 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23128 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
23129 temp = gen_reg_rtx (result_mode);
23130 emit_insn (gen_rtx_SET (temp,
23131 gen_rtx_IF_THEN_ELSE (result_mode,
23132 gen_rtx_GE (VOIDmode,
23134 true_cond, false_cond)));
23135 true_cond = false_cond;
23138 temp = gen_reg_rtx (compare_mode);
23139 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23144 gcc_unreachable ();
23147 emit_insn (gen_rtx_SET (dest,
23148 gen_rtx_IF_THEN_ELSE (result_mode,
23149 gen_rtx_GE (VOIDmode,
23151 true_cond, false_cond)));
23155 /* Same as above, but for ints (isel). */
23158 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23160 rtx condition_rtx, cr;
23161 machine_mode mode = GET_MODE (dest);
23162 enum rtx_code cond_code;
23163 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
23166 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
23169 /* We still have to do the compare, because isel doesn't do a
23170 compare, it just looks at the CRx bits set by a previous compare
23172 condition_rtx = rs6000_generate_compare (op, mode);
23173 cond_code = GET_CODE (condition_rtx);
23174 cr = XEXP (condition_rtx, 0);
23175 signedp = GET_MODE (cr) == CCmode;
23177 isel_func = (mode == SImode
23178 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
23179 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
23183 case LT: case GT: case LTU: case GTU: case EQ:
23184 /* isel handles these directly. */
23188 /* We need to swap the sense of the comparison. */
23190 std::swap (false_cond, true_cond);
23191 PUT_CODE (condition_rtx, reverse_condition (cond_code));
23196 false_cond = force_reg (mode, false_cond);
23197 if (true_cond != const0_rtx)
23198 true_cond = force_reg (mode, true_cond);
23200 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
23206 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
23208 machine_mode mode = GET_MODE (op0);
23212 /* VSX/altivec have direct min/max insns. */
23213 if ((code == SMAX || code == SMIN)
23214 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
23215 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
23217 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
23221 if (code == SMAX || code == SMIN)
23226 if (code == SMAX || code == UMAX)
23227 target = emit_conditional_move (dest, c, op0, op1, mode,
23228 op0, op1, mode, 0);
23230 target = emit_conditional_move (dest, c, op0, op1, mode,
23231 op1, op0, mode, 0);
23232 gcc_assert (target);
23233 if (target != dest)
23234 emit_move_insn (dest, target);
23237 /* A subroutine of the atomic operation splitters. Jump to LABEL if
23238 COND is true. Mark the jump as unlikely to be taken. */
23241 emit_unlikely_jump (rtx cond, rtx label)
23243 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
23244 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
23245 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
23248 /* A subroutine of the atomic operation splitters. Emit a load-locked
23249 instruction in MODE. For QI/HImode, possibly use a pattern than includes
23250 the zero_extend operation. */
23253 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
23255 rtx (*fn) (rtx, rtx) = NULL;
23260 fn = gen_load_lockedqi;
23263 fn = gen_load_lockedhi;
23266 if (GET_MODE (mem) == QImode)
23267 fn = gen_load_lockedqi_si;
23268 else if (GET_MODE (mem) == HImode)
23269 fn = gen_load_lockedhi_si;
23271 fn = gen_load_lockedsi;
23274 fn = gen_load_lockeddi;
23277 fn = gen_load_lockedti;
23280 gcc_unreachable ();
23282 emit_insn (fn (reg, mem));
23285 /* A subroutine of the atomic operation splitters. Emit a store-conditional
23286 instruction in MODE. */
23289 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
23291 rtx (*fn) (rtx, rtx, rtx) = NULL;
23296 fn = gen_store_conditionalqi;
23299 fn = gen_store_conditionalhi;
23302 fn = gen_store_conditionalsi;
23305 fn = gen_store_conditionaldi;
23308 fn = gen_store_conditionalti;
23311 gcc_unreachable ();
23314 /* Emit sync before stwcx. to address PPC405 Erratum. */
23315 if (PPC405_ERRATUM77)
23316 emit_insn (gen_hwsync ());
23318 emit_insn (fn (res, mem, val));
23321 /* Expand barriers before and after a load_locked/store_cond sequence. */
23324 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
23326 rtx addr = XEXP (mem, 0);
23328 if (!legitimate_indirect_address_p (addr, reload_completed)
23329 && !legitimate_indexed_address_p (addr, reload_completed))
23331 addr = force_reg (Pmode, addr);
23332 mem = replace_equiv_address_nv (mem, addr);
23337 case MEMMODEL_RELAXED:
23338 case MEMMODEL_CONSUME:
23339 case MEMMODEL_ACQUIRE:
23341 case MEMMODEL_RELEASE:
23342 case MEMMODEL_ACQ_REL:
23343 emit_insn (gen_lwsync ());
23345 case MEMMODEL_SEQ_CST:
23346 emit_insn (gen_hwsync ());
23349 gcc_unreachable ();
23355 rs6000_post_atomic_barrier (enum memmodel model)
23359 case MEMMODEL_RELAXED:
23360 case MEMMODEL_CONSUME:
23361 case MEMMODEL_RELEASE:
23363 case MEMMODEL_ACQUIRE:
23364 case MEMMODEL_ACQ_REL:
23365 case MEMMODEL_SEQ_CST:
23366 emit_insn (gen_isync ());
23369 gcc_unreachable ();
23373 /* A subroutine of the various atomic expanders. For sub-word operations,
23374 we must adjust things to operate on SImode. Given the original MEM,
23375 return a new aligned memory. Also build and return the quantities by
23376 which to shift and mask. */
23379 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
23381 rtx addr, align, shift, mask, mem;
23382 HOST_WIDE_INT shift_mask;
23383 machine_mode mode = GET_MODE (orig_mem);
23385 /* For smaller modes, we have to implement this via SImode. */
23386 shift_mask = (mode == QImode ? 0x18 : 0x10);
23388 addr = XEXP (orig_mem, 0);
23389 addr = force_reg (GET_MODE (addr), addr);
23391 /* Aligned memory containing subword. Generate a new memory. We
23392 do not want any of the existing MEM_ATTR data, as we're now
23393 accessing memory outside the original object. */
23394 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
23395 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23396 mem = gen_rtx_MEM (SImode, align);
23397 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
23398 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
23399 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
23401 /* Shift amount for subword relative to aligned word. */
23402 shift = gen_reg_rtx (SImode);
23403 addr = gen_lowpart (SImode, addr);
23404 rtx tmp = gen_reg_rtx (SImode);
23405 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
23406 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
23407 if (BYTES_BIG_ENDIAN)
23408 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
23409 shift, 1, OPTAB_LIB_WIDEN);
23412 /* Mask for insertion. */
23413 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
23414 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
23420 /* A subroutine of the various atomic expanders. For sub-word operands,
23421 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
23424 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
23428 x = gen_reg_rtx (SImode);
23429 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
23430 gen_rtx_NOT (SImode, mask),
23433 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
23438 /* A subroutine of the various atomic expanders. For sub-word operands,
23439 extract WIDE to NARROW via SHIFT. */
23442 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
23444 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
23445 wide, 1, OPTAB_LIB_WIDEN);
23446 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
23449 /* Expand an atomic compare and swap operation. */
23452 rs6000_expand_atomic_compare_and_swap (rtx operands[])
23454 rtx boolval, retval, mem, oldval, newval, cond;
23455 rtx label1, label2, x, mask, shift;
23456 machine_mode mode, orig_mode;
23457 enum memmodel mod_s, mod_f;
23460 boolval = operands[0];
23461 retval = operands[1];
23463 oldval = operands[3];
23464 newval = operands[4];
23465 is_weak = (INTVAL (operands[5]) != 0);
23466 mod_s = memmodel_base (INTVAL (operands[6]));
23467 mod_f = memmodel_base (INTVAL (operands[7]));
23468 orig_mode = mode = GET_MODE (mem);
23470 mask = shift = NULL_RTX;
23471 if (mode == QImode || mode == HImode)
23473 /* Before power8, we didn't have access to lbarx/lharx, so generate a
23474 lwarx and shift/mask operations. With power8, we need to do the
23475 comparison in SImode, but the store is still done in QI/HImode. */
23476 oldval = convert_modes (SImode, mode, oldval, 1);
23478 if (!TARGET_SYNC_HI_QI)
23480 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23482 /* Shift and mask OLDVAL into position with the word. */
23483 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
23484 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23486 /* Shift and mask NEWVAL into position within the word. */
23487 newval = convert_modes (SImode, mode, newval, 1);
23488 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
23489 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23492 /* Prepare to adjust the return value. */
23493 retval = gen_reg_rtx (SImode);
23496 else if (reg_overlap_mentioned_p (retval, oldval))
23497 oldval = copy_to_reg (oldval);
23499 if (mode != TImode && !reg_or_short_operand (oldval, mode))
23500 oldval = copy_to_mode_reg (mode, oldval);
23502 if (reg_overlap_mentioned_p (retval, newval))
23503 newval = copy_to_reg (newval);
23505 mem = rs6000_pre_atomic_barrier (mem, mod_s);
23510 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23511 emit_label (XEXP (label1, 0));
23513 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23515 emit_load_locked (mode, retval, mem);
23519 x = expand_simple_binop (SImode, AND, retval, mask,
23520 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23522 cond = gen_reg_rtx (CCmode);
23523 /* If we have TImode, synthesize a comparison. */
23524 if (mode != TImode)
23525 x = gen_rtx_COMPARE (CCmode, x, oldval);
23528 rtx xor1_result = gen_reg_rtx (DImode);
23529 rtx xor2_result = gen_reg_rtx (DImode);
23530 rtx or_result = gen_reg_rtx (DImode);
23531 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
23532 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
23533 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
23534 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
23536 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
23537 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
23538 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
23539 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
23542 emit_insn (gen_rtx_SET (cond, x));
23544 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23545 emit_unlikely_jump (x, label2);
23549 x = rs6000_mask_atomic_subword (retval, newval, mask);
23551 emit_store_conditional (orig_mode, cond, mem, x);
23555 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23556 emit_unlikely_jump (x, label1);
23559 if (!is_mm_relaxed (mod_f))
23560 emit_label (XEXP (label2, 0));
23562 rs6000_post_atomic_barrier (mod_s);
23564 if (is_mm_relaxed (mod_f))
23565 emit_label (XEXP (label2, 0));
23568 rs6000_finish_atomic_subword (operands[1], retval, shift);
23569 else if (mode != GET_MODE (operands[1]))
23570 convert_move (operands[1], retval, 1);
23572 /* In all cases, CR0 contains EQ on success, and NE on failure. */
23573 x = gen_rtx_EQ (SImode, cond, const0_rtx);
23574 emit_insn (gen_rtx_SET (boolval, x));
23577 /* Expand an atomic exchange operation. */
23580 rs6000_expand_atomic_exchange (rtx operands[])
23582 rtx retval, mem, val, cond;
23584 enum memmodel model;
23585 rtx label, x, mask, shift;
23587 retval = operands[0];
23590 model = memmodel_base (INTVAL (operands[3]));
23591 mode = GET_MODE (mem);
23593 mask = shift = NULL_RTX;
23594 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
23596 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23598 /* Shift and mask VAL into position with the word. */
23599 val = convert_modes (SImode, mode, val, 1);
23600 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23601 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23603 /* Prepare to adjust the return value. */
23604 retval = gen_reg_rtx (SImode);
23608 mem = rs6000_pre_atomic_barrier (mem, model);
23610 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23611 emit_label (XEXP (label, 0));
23613 emit_load_locked (mode, retval, mem);
23617 x = rs6000_mask_atomic_subword (retval, val, mask);
23619 cond = gen_reg_rtx (CCmode);
23620 emit_store_conditional (mode, cond, mem, x);
23622 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23623 emit_unlikely_jump (x, label);
23625 rs6000_post_atomic_barrier (model);
23628 rs6000_finish_atomic_subword (operands[0], retval, shift);
23631 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
23632 to perform. MEM is the memory on which to operate. VAL is the second
23633 operand of the binary operator. BEFORE and AFTER are optional locations to
23634 return the value of MEM either before of after the operation. MODEL_RTX
23635 is a CONST_INT containing the memory model to use. */
23638 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
23639 rtx orig_before, rtx orig_after, rtx model_rtx)
23641 enum memmodel model = memmodel_base (INTVAL (model_rtx));
23642 machine_mode mode = GET_MODE (mem);
23643 machine_mode store_mode = mode;
23644 rtx label, x, cond, mask, shift;
23645 rtx before = orig_before, after = orig_after;
23647 mask = shift = NULL_RTX;
23648 /* On power8, we want to use SImode for the operation. On previous systems,
23649 use the operation in a subword and shift/mask to get the proper byte or
23651 if (mode == QImode || mode == HImode)
23653 if (TARGET_SYNC_HI_QI)
23655 val = convert_modes (SImode, mode, val, 1);
23657 /* Prepare to adjust the return value. */
23658 before = gen_reg_rtx (SImode);
23660 after = gen_reg_rtx (SImode);
23665 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23667 /* Shift and mask VAL into position with the word. */
23668 val = convert_modes (SImode, mode, val, 1);
23669 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23670 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23676 /* We've already zero-extended VAL. That is sufficient to
23677 make certain that it does not affect other bits. */
23682 /* If we make certain that all of the other bits in VAL are
23683 set, that will be sufficient to not affect other bits. */
23684 x = gen_rtx_NOT (SImode, mask);
23685 x = gen_rtx_IOR (SImode, x, val);
23686 emit_insn (gen_rtx_SET (val, x));
23693 /* These will all affect bits outside the field and need
23694 adjustment via MASK within the loop. */
23698 gcc_unreachable ();
23701 /* Prepare to adjust the return value. */
23702 before = gen_reg_rtx (SImode);
23704 after = gen_reg_rtx (SImode);
23705 store_mode = mode = SImode;
23709 mem = rs6000_pre_atomic_barrier (mem, model);
23711 label = gen_label_rtx ();
23712 emit_label (label);
23713 label = gen_rtx_LABEL_REF (VOIDmode, label);
23715 if (before == NULL_RTX)
23716 before = gen_reg_rtx (mode);
23718 emit_load_locked (mode, before, mem);
23722 x = expand_simple_binop (mode, AND, before, val,
23723 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23724 after = expand_simple_unop (mode, NOT, x, after, 1);
23728 after = expand_simple_binop (mode, code, before, val,
23729 after, 1, OPTAB_LIB_WIDEN);
23735 x = expand_simple_binop (SImode, AND, after, mask,
23736 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23737 x = rs6000_mask_atomic_subword (before, x, mask);
23739 else if (store_mode != mode)
23740 x = convert_modes (store_mode, mode, x, 1);
23742 cond = gen_reg_rtx (CCmode);
23743 emit_store_conditional (store_mode, cond, mem, x);
23745 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23746 emit_unlikely_jump (x, label);
23748 rs6000_post_atomic_barrier (model);
23752 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
23753 then do the calcuations in a SImode register. */
23755 rs6000_finish_atomic_subword (orig_before, before, shift);
23757 rs6000_finish_atomic_subword (orig_after, after, shift);
23759 else if (store_mode != mode)
23761 /* QImode/HImode on machines with lbarx/lharx where we do the native
23762 operation and then do the calcuations in a SImode register. */
23764 convert_move (orig_before, before, 1);
23766 convert_move (orig_after, after, 1);
23768 else if (orig_after && after != orig_after)
23769 emit_move_insn (orig_after, after);
23772 /* Emit instructions to move SRC to DST. Called by splitters for
23773 multi-register moves. It will emit at most one instruction for
23774 each register that is accessed; that is, it won't emit li/lis pairs
23775 (or equivalent for 64-bit code). One of SRC or DST must be a hard
23779 rs6000_split_multireg_move (rtx dst, rtx src)
23781 /* The register number of the first register being moved. */
23783 /* The mode that is to be moved. */
23785 /* The mode that the move is being done in, and its size. */
23786 machine_mode reg_mode;
23788 /* The number of registers that will be moved. */
23791 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
23792 mode = GET_MODE (dst);
23793 nregs = hard_regno_nregs (reg, mode);
23794 if (FP_REGNO_P (reg))
23795 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
23796 (TARGET_HARD_FLOAT ? DFmode : SFmode);
23797 else if (ALTIVEC_REGNO_P (reg))
23798 reg_mode = V16QImode;
23800 reg_mode = word_mode;
23801 reg_mode_size = GET_MODE_SIZE (reg_mode);
23803 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
23805 /* TDmode residing in FP registers is special, since the ISA requires that
23806 the lower-numbered word of a register pair is always the most significant
23807 word, even in little-endian mode. This does not match the usual subreg
23808 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
23809 the appropriate constituent registers "by hand" in little-endian mode.
23811 Note we do not need to check for destructive overlap here since TDmode
23812 can only reside in even/odd register pairs. */
23813 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
23818 for (i = 0; i < nregs; i++)
23820 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
23821 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
23823 p_src = simplify_gen_subreg (reg_mode, src, mode,
23824 i * reg_mode_size);
23826 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
23827 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
23829 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
23830 i * reg_mode_size);
23832 emit_insn (gen_rtx_SET (p_dst, p_src));
23838 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
23840 /* Move register range backwards, if we might have destructive
23843 for (i = nregs - 1; i >= 0; i--)
23844 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23845 i * reg_mode_size),
23846 simplify_gen_subreg (reg_mode, src, mode,
23847 i * reg_mode_size)));
23853 bool used_update = false;
23854 rtx restore_basereg = NULL_RTX;
23856 if (MEM_P (src) && INT_REGNO_P (reg))
23860 if (GET_CODE (XEXP (src, 0)) == PRE_INC
23861 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
23864 breg = XEXP (XEXP (src, 0), 0);
23865 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
23866 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
23867 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
23868 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23869 src = replace_equiv_address (src, breg);
23871 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
23873 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
23875 rtx basereg = XEXP (XEXP (src, 0), 0);
23878 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
23879 emit_insn (gen_rtx_SET (ndst,
23880 gen_rtx_MEM (reg_mode,
23882 used_update = true;
23885 emit_insn (gen_rtx_SET (basereg,
23886 XEXP (XEXP (src, 0), 1)));
23887 src = replace_equiv_address (src, basereg);
23891 rtx basereg = gen_rtx_REG (Pmode, reg);
23892 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
23893 src = replace_equiv_address (src, basereg);
23897 breg = XEXP (src, 0);
23898 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
23899 breg = XEXP (breg, 0);
23901 /* If the base register we are using to address memory is
23902 also a destination reg, then change that register last. */
23904 && REGNO (breg) >= REGNO (dst)
23905 && REGNO (breg) < REGNO (dst) + nregs)
23906 j = REGNO (breg) - REGNO (dst);
23908 else if (MEM_P (dst) && INT_REGNO_P (reg))
23912 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
23913 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
23916 breg = XEXP (XEXP (dst, 0), 0);
23917 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
23918 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
23919 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
23921 /* We have to update the breg before doing the store.
23922 Use store with update, if available. */
23926 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23927 emit_insn (TARGET_32BIT
23928 ? (TARGET_POWERPC64
23929 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
23930 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
23931 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
23932 used_update = true;
23935 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23936 dst = replace_equiv_address (dst, breg);
23938 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
23939 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
23941 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
23943 rtx basereg = XEXP (XEXP (dst, 0), 0);
23946 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23947 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
23950 used_update = true;
23953 emit_insn (gen_rtx_SET (basereg,
23954 XEXP (XEXP (dst, 0), 1)));
23955 dst = replace_equiv_address (dst, basereg);
23959 rtx basereg = XEXP (XEXP (dst, 0), 0);
23960 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
23961 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
23963 && REG_P (offsetreg)
23964 && REGNO (basereg) != REGNO (offsetreg));
23965 if (REGNO (basereg) == 0)
23967 rtx tmp = offsetreg;
23968 offsetreg = basereg;
23971 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
23972 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
23973 dst = replace_equiv_address (dst, basereg);
23976 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
23977 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
23980 for (i = 0; i < nregs; i++)
23982 /* Calculate index to next subword. */
23987 /* If compiler already emitted move of first word by
23988 store with update, no need to do anything. */
23989 if (j == 0 && used_update)
23992 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23993 j * reg_mode_size),
23994 simplify_gen_subreg (reg_mode, src, mode,
23995 j * reg_mode_size)));
23997 if (restore_basereg != NULL_RTX)
23998 emit_insn (restore_basereg);
24003 /* This page contains routines that are used to determine what the
24004 function prologue and epilogue code will do and write them out. */
24006 /* Determine whether the REG is really used. */
24009 save_reg_p (int reg)
24011 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
24013 /* When calling eh_return, we must return true for all the cases
24014 where conditional_register_usage marks the PIC offset reg
24015 call used or fixed. */
24016 if (crtl->calls_eh_return
24017 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
24018 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
24019 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
24022 /* We need to mark the PIC offset register live for the same
24023 conditions as it is set up in rs6000_emit_prologue, or
24024 otherwise it won't be saved before we clobber it. */
24025 if (TARGET_TOC && TARGET_MINIMAL_TOC
24026 && !constant_pool_empty_p ())
24029 if (DEFAULT_ABI == ABI_V4
24030 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
24031 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
24034 if (DEFAULT_ABI == ABI_DARWIN
24035 && flag_pic && crtl->uses_pic_offset_table)
24039 return !call_used_regs[reg] && df_regs_ever_live_p (reg);
24042 /* Return the first fixed-point register that is required to be
24043 saved. 32 if none. */
24046 first_reg_to_save (void)
24050 /* Find lowest numbered live register. */
24051 for (first_reg = 13; first_reg <= 31; first_reg++)
24052 if (save_reg_p (first_reg))
24058 /* Similar, for FP regs. */
24061 first_fp_reg_to_save (void)
24065 /* Find lowest numbered live register. */
24066 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
24067 if (save_reg_p (first_reg))
24073 /* Similar, for AltiVec regs. */
24076 first_altivec_reg_to_save (void)
24080 /* Stack frame remains as is unless we are in AltiVec ABI. */
24081 if (! TARGET_ALTIVEC_ABI)
24082 return LAST_ALTIVEC_REGNO + 1;
24084 /* On Darwin, the unwind routines are compiled without
24085 TARGET_ALTIVEC, and use save_world to save/restore the
24086 altivec registers when necessary. */
24087 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
24088 && ! TARGET_ALTIVEC)
24089 return FIRST_ALTIVEC_REGNO + 20;
24091 /* Find lowest numbered live register. */
24092 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
24093 if (save_reg_p (i))
24099 /* Return a 32-bit mask of the AltiVec registers we need to set in
24100 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
24101 the 32-bit word is 0. */
24103 static unsigned int
24104 compute_vrsave_mask (void)
24106 unsigned int i, mask = 0;
24108 /* On Darwin, the unwind routines are compiled without
24109 TARGET_ALTIVEC, and use save_world to save/restore the
24110 call-saved altivec registers when necessary. */
24111 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
24112 && ! TARGET_ALTIVEC)
24115 /* First, find out if we use _any_ altivec registers. */
24116 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
24117 if (df_regs_ever_live_p (i))
24118 mask |= ALTIVEC_REG_BIT (i);
24123 /* Next, remove the argument registers from the set. These must
24124 be in the VRSAVE mask set by the caller, so we don't need to add
24125 them in again. More importantly, the mask we compute here is
24126 used to generate CLOBBERs in the set_vrsave insn, and we do not
24127 wish the argument registers to die. */
24128 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
24129 mask &= ~ALTIVEC_REG_BIT (i);
24131 /* Similarly, remove the return value from the set. */
24134 diddle_return_value (is_altivec_return_reg, &yes);
24136 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
24142 /* For a very restricted set of circumstances, we can cut down the
24143 size of prologues/epilogues by calling our own save/restore-the-world
24147 compute_save_world_info (rs6000_stack_t *info)
24149 info->world_save_p = 1;
24151 = (WORLD_SAVE_P (info)
24152 && DEFAULT_ABI == ABI_DARWIN
24153 && !cfun->has_nonlocal_label
24154 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
24155 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
24156 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
24157 && info->cr_save_p);
24159 /* This will not work in conjunction with sibcalls. Make sure there
24160 are none. (This check is expensive, but seldom executed.) */
24161 if (WORLD_SAVE_P (info))
24164 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
24165 if (CALL_P (insn) && SIBLING_CALL_P (insn))
24167 info->world_save_p = 0;
24172 if (WORLD_SAVE_P (info))
24174 /* Even if we're not touching VRsave, make sure there's room on the
24175 stack for it, if it looks like we're calling SAVE_WORLD, which
24176 will attempt to save it. */
24177 info->vrsave_size = 4;
24179 /* If we are going to save the world, we need to save the link register too. */
24180 info->lr_save_p = 1;
24182 /* "Save" the VRsave register too if we're saving the world. */
24183 if (info->vrsave_mask == 0)
24184 info->vrsave_mask = compute_vrsave_mask ();
24186 /* Because the Darwin register save/restore routines only handle
24187 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
24189 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
24190 && (info->first_altivec_reg_save
24191 >= FIRST_SAVED_ALTIVEC_REGNO));
24199 is_altivec_return_reg (rtx reg, void *xyes)
24201 bool *yes = (bool *) xyes;
24202 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
24207 /* Return whether REG is a global user reg or has been specifed by
24208 -ffixed-REG. We should not restore these, and so cannot use
24209 lmw or out-of-line restore functions if there are any. We also
24210 can't save them (well, emit frame notes for them), because frame
24211 unwinding during exception handling will restore saved registers. */
24214 fixed_reg_p (int reg)
24216 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
24217 backend sets it, overriding anything the user might have given. */
24218 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
24219 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
24220 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
24221 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
24224 return fixed_regs[reg];
24227 /* Determine the strategy for savings/restoring registers. */
24230 SAVE_MULTIPLE = 0x1,
24231 SAVE_INLINE_GPRS = 0x2,
24232 SAVE_INLINE_FPRS = 0x4,
24233 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
24234 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
24235 SAVE_INLINE_VRS = 0x20,
24236 REST_MULTIPLE = 0x100,
24237 REST_INLINE_GPRS = 0x200,
24238 REST_INLINE_FPRS = 0x400,
24239 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
24240 REST_INLINE_VRS = 0x1000
24244 rs6000_savres_strategy (rs6000_stack_t *info,
24245 bool using_static_chain_p)
24249 /* Select between in-line and out-of-line save and restore of regs.
24250 First, all the obvious cases where we don't use out-of-line. */
24251 if (crtl->calls_eh_return
24252 || cfun->machine->ra_need_lr)
24253 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
24254 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
24255 | SAVE_INLINE_VRS | REST_INLINE_VRS);
24257 if (info->first_gp_reg_save == 32)
24258 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24260 if (info->first_fp_reg_save == 64)
24261 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24263 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
24264 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24266 /* Define cutoff for using out-of-line functions to save registers. */
24267 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
24269 if (!optimize_size)
24271 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24272 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24273 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24277 /* Prefer out-of-line restore if it will exit. */
24278 if (info->first_fp_reg_save > 61)
24279 strategy |= SAVE_INLINE_FPRS;
24280 if (info->first_gp_reg_save > 29)
24282 if (info->first_fp_reg_save == 64)
24283 strategy |= SAVE_INLINE_GPRS;
24285 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24287 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
24288 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24291 else if (DEFAULT_ABI == ABI_DARWIN)
24293 if (info->first_fp_reg_save > 60)
24294 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24295 if (info->first_gp_reg_save > 29)
24296 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24297 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24301 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24302 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
24303 || info->first_fp_reg_save > 61)
24304 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24305 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24306 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24309 /* Don't bother to try to save things out-of-line if r11 is occupied
24310 by the static chain. It would require too much fiddling and the
24311 static chain is rarely used anyway. FPRs are saved w.r.t the stack
24312 pointer on Darwin, and AIX uses r1 or r12. */
24313 if (using_static_chain_p
24314 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
24315 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
24317 | SAVE_INLINE_VRS);
24319 /* Don't ever restore fixed regs. That means we can't use the
24320 out-of-line register restore functions if a fixed reg is in the
24321 range of regs restored. */
24322 if (!(strategy & REST_INLINE_FPRS))
24323 for (int i = info->first_fp_reg_save; i < 64; i++)
24326 strategy |= REST_INLINE_FPRS;
24330 /* We can only use the out-of-line routines to restore fprs if we've
24331 saved all the registers from first_fp_reg_save in the prologue.
24332 Otherwise, we risk loading garbage. Of course, if we have saved
24333 out-of-line then we know we haven't skipped any fprs. */
24334 if ((strategy & SAVE_INLINE_FPRS)
24335 && !(strategy & REST_INLINE_FPRS))
24336 for (int i = info->first_fp_reg_save; i < 64; i++)
24337 if (!save_reg_p (i))
24339 strategy |= REST_INLINE_FPRS;
24343 /* Similarly, for altivec regs. */
24344 if (!(strategy & REST_INLINE_VRS))
24345 for (int i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
24348 strategy |= REST_INLINE_VRS;
24352 if ((strategy & SAVE_INLINE_VRS)
24353 && !(strategy & REST_INLINE_VRS))
24354 for (int i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
24355 if (!save_reg_p (i))
24357 strategy |= REST_INLINE_VRS;
24361 /* info->lr_save_p isn't yet set if the only reason lr needs to be
24362 saved is an out-of-line save or restore. Set up the value for
24363 the next test (excluding out-of-line gprs). */
24364 bool lr_save_p = (info->lr_save_p
24365 || !(strategy & SAVE_INLINE_FPRS)
24366 || !(strategy & SAVE_INLINE_VRS)
24367 || !(strategy & REST_INLINE_FPRS)
24368 || !(strategy & REST_INLINE_VRS));
24370 if (TARGET_MULTIPLE
24371 && !TARGET_POWERPC64
24372 && info->first_gp_reg_save < 31
24373 && !(flag_shrink_wrap
24374 && flag_shrink_wrap_separate
24375 && optimize_function_for_speed_p (cfun)))
24378 for (int i = info->first_gp_reg_save; i < 32; i++)
24379 if (save_reg_p (i))
24383 /* Don't use store multiple if only one reg needs to be
24384 saved. This can occur for example when the ABI_V4 pic reg
24385 (r30) needs to be saved to make calls, but r31 is not
24387 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24390 /* Prefer store multiple for saves over out-of-line
24391 routines, since the store-multiple instruction will
24392 always be smaller. */
24393 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
24395 /* The situation is more complicated with load multiple.
24396 We'd prefer to use the out-of-line routines for restores,
24397 since the "exit" out-of-line routines can handle the
24398 restore of LR and the frame teardown. However if doesn't
24399 make sense to use the out-of-line routine if that is the
24400 only reason we'd need to save LR, and we can't use the
24401 "exit" out-of-line gpr restore if we have saved some
24402 fprs; In those cases it is advantageous to use load
24403 multiple when available. */
24404 if (info->first_fp_reg_save != 64 || !lr_save_p)
24405 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
24409 /* Using the "exit" out-of-line routine does not improve code size
24410 if using it would require lr to be saved and if only saving one
24412 else if (!lr_save_p && info->first_gp_reg_save > 29)
24413 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24415 /* Don't ever restore fixed regs. */
24416 if ((strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
24417 for (int i = info->first_gp_reg_save; i < 32; i++)
24418 if (fixed_reg_p (i))
24420 strategy |= REST_INLINE_GPRS;
24421 strategy &= ~REST_MULTIPLE;
24425 /* We can only use load multiple or the out-of-line routines to
24426 restore gprs if we've saved all the registers from
24427 first_gp_reg_save. Otherwise, we risk loading garbage.
24428 Of course, if we have saved out-of-line or used stmw then we know
24429 we haven't skipped any gprs. */
24430 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
24431 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
24432 for (int i = info->first_gp_reg_save; i < 32; i++)
24433 if (!save_reg_p (i))
24435 strategy |= REST_INLINE_GPRS;
24436 strategy &= ~REST_MULTIPLE;
24440 if (TARGET_ELF && TARGET_64BIT)
24442 if (!(strategy & SAVE_INLINE_FPRS))
24443 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24444 else if (!(strategy & SAVE_INLINE_GPRS)
24445 && info->first_fp_reg_save == 64)
24446 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
24448 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
24449 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
24451 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
24452 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24457 /* Calculate the stack information for the current function. This is
24458 complicated by having two separate calling sequences, the AIX calling
24459 sequence and the V.4 calling sequence.
24461 AIX (and Darwin/Mac OS X) stack frames look like:
24463 SP----> +---------------------------------------+
24464 | back chain to caller | 0 0
24465 +---------------------------------------+
24466 | saved CR | 4 8 (8-11)
24467 +---------------------------------------+
24469 +---------------------------------------+
24470 | reserved for compilers | 12 24
24471 +---------------------------------------+
24472 | reserved for binders | 16 32
24473 +---------------------------------------+
24474 | saved TOC pointer | 20 40
24475 +---------------------------------------+
24476 | Parameter save area (+padding*) (P) | 24 48
24477 +---------------------------------------+
24478 | Alloca space (A) | 24+P etc.
24479 +---------------------------------------+
24480 | Local variable space (L) | 24+P+A
24481 +---------------------------------------+
24482 | Float/int conversion temporary (X) | 24+P+A+L
24483 +---------------------------------------+
24484 | Save area for AltiVec registers (W) | 24+P+A+L+X
24485 +---------------------------------------+
24486 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
24487 +---------------------------------------+
24488 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
24489 +---------------------------------------+
24490 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
24491 +---------------------------------------+
24492 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
24493 +---------------------------------------+
24494 old SP->| back chain to caller's caller |
24495 +---------------------------------------+
24497 * If the alloca area is present, the parameter save area is
24498 padded so that the former starts 16-byte aligned.
24500 The required alignment for AIX configurations is two words (i.e., 8
24503 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
24505 SP----> +---------------------------------------+
24506 | Back chain to caller | 0
24507 +---------------------------------------+
24508 | Save area for CR | 8
24509 +---------------------------------------+
24511 +---------------------------------------+
24512 | Saved TOC pointer | 24
24513 +---------------------------------------+
24514 | Parameter save area (+padding*) (P) | 32
24515 +---------------------------------------+
24516 | Alloca space (A) | 32+P
24517 +---------------------------------------+
24518 | Local variable space (L) | 32+P+A
24519 +---------------------------------------+
24520 | Save area for AltiVec registers (W) | 32+P+A+L
24521 +---------------------------------------+
24522 | AltiVec alignment padding (Y) | 32+P+A+L+W
24523 +---------------------------------------+
24524 | Save area for GP registers (G) | 32+P+A+L+W+Y
24525 +---------------------------------------+
24526 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
24527 +---------------------------------------+
24528 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
24529 +---------------------------------------+
24531 * If the alloca area is present, the parameter save area is
24532 padded so that the former starts 16-byte aligned.
24534 V.4 stack frames look like:
24536 SP----> +---------------------------------------+
24537 | back chain to caller | 0
24538 +---------------------------------------+
24539 | caller's saved LR | 4
24540 +---------------------------------------+
24541 | Parameter save area (+padding*) (P) | 8
24542 +---------------------------------------+
24543 | Alloca space (A) | 8+P
24544 +---------------------------------------+
24545 | Varargs save area (V) | 8+P+A
24546 +---------------------------------------+
24547 | Local variable space (L) | 8+P+A+V
24548 +---------------------------------------+
24549 | Float/int conversion temporary (X) | 8+P+A+V+L
24550 +---------------------------------------+
24551 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
24552 +---------------------------------------+
24553 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
24554 +---------------------------------------+
24555 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
24556 +---------------------------------------+
24557 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
24558 +---------------------------------------+
24559 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
24560 +---------------------------------------+
24561 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
24562 +---------------------------------------+
24563 old SP->| back chain to caller's caller |
24564 +---------------------------------------+
24566 * If the alloca area is present and the required alignment is
24567 16 bytes, the parameter save area is padded so that the
24568 alloca area starts 16-byte aligned.
24570 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
24571 given. (But note below and in sysv4.h that we require only 8 and
24572 may round up the size of our stack frame anyways. The historical
24573 reason is early versions of powerpc-linux which didn't properly
24574 align the stack at program startup. A happy side-effect is that
24575 -mno-eabi libraries can be used with -meabi programs.)
24577 The EABI configuration defaults to the V.4 layout. However,
24578 the stack alignment requirements may differ. If -mno-eabi is not
24579 given, the required stack alignment is 8 bytes; if -mno-eabi is
24580 given, the required alignment is 16 bytes. (But see V.4 comment
24583 #ifndef ABI_STACK_BOUNDARY
24584 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
24587 static rs6000_stack_t *
24588 rs6000_stack_info (void)
24590 /* We should never be called for thunks, we are not set up for that. */
24591 gcc_assert (!cfun->is_thunk);
24593 rs6000_stack_t *info = &stack_info;
24594 int reg_size = TARGET_32BIT ? 4 : 8;
24599 HOST_WIDE_INT non_fixed_size;
24600 bool using_static_chain_p;
24602 if (reload_completed && info->reload_completed)
24605 memset (info, 0, sizeof (*info));
24606 info->reload_completed = reload_completed;
24608 /* Select which calling sequence. */
24609 info->abi = DEFAULT_ABI;
24611 /* Calculate which registers need to be saved & save area size. */
24612 info->first_gp_reg_save = first_reg_to_save ();
24613 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
24614 even if it currently looks like we won't. Reload may need it to
24615 get at a constant; if so, it will have already created a constant
24616 pool entry for it. */
24617 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
24618 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
24619 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
24620 && crtl->uses_const_pool
24621 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
24622 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
24624 first_gp = info->first_gp_reg_save;
24626 info->gp_size = reg_size * (32 - first_gp);
24628 info->first_fp_reg_save = first_fp_reg_to_save ();
24629 info->fp_size = 8 * (64 - info->first_fp_reg_save);
24631 info->first_altivec_reg_save = first_altivec_reg_to_save ();
24632 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
24633 - info->first_altivec_reg_save);
24635 /* Does this function call anything? */
24636 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
24638 /* Determine if we need to save the condition code registers. */
24639 if (save_reg_p (CR2_REGNO)
24640 || save_reg_p (CR3_REGNO)
24641 || save_reg_p (CR4_REGNO))
24643 info->cr_save_p = 1;
24644 if (DEFAULT_ABI == ABI_V4)
24645 info->cr_size = reg_size;
24648 /* If the current function calls __builtin_eh_return, then we need
24649 to allocate stack space for registers that will hold data for
24650 the exception handler. */
24651 if (crtl->calls_eh_return)
24654 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
24657 ehrd_size = i * UNITS_PER_WORD;
24662 /* In the ELFv2 ABI, we also need to allocate space for separate
24663 CR field save areas if the function calls __builtin_eh_return. */
24664 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24666 /* This hard-codes that we have three call-saved CR fields. */
24667 ehcr_size = 3 * reg_size;
24668 /* We do *not* use the regular CR save mechanism. */
24669 info->cr_save_p = 0;
24674 /* Determine various sizes. */
24675 info->reg_size = reg_size;
24676 info->fixed_size = RS6000_SAVE_AREA;
24677 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
24678 if (cfun->calls_alloca)
24680 RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
24681 STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
24683 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
24684 TARGET_ALTIVEC ? 16 : 8);
24685 if (FRAME_GROWS_DOWNWARD)
24687 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
24688 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
24689 - (info->fixed_size + info->vars_size + info->parm_size);
24691 if (TARGET_ALTIVEC_ABI)
24692 info->vrsave_mask = compute_vrsave_mask ();
24694 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
24695 info->vrsave_size = 4;
24697 compute_save_world_info (info);
24699 /* Calculate the offsets. */
24700 switch (DEFAULT_ABI)
24704 gcc_unreachable ();
24709 info->fp_save_offset = -info->fp_size;
24710 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24712 if (TARGET_ALTIVEC_ABI)
24714 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
24716 /* Align stack so vector save area is on a quadword boundary.
24717 The padding goes above the vectors. */
24718 if (info->altivec_size != 0)
24719 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
24721 info->altivec_save_offset = info->vrsave_save_offset
24722 - info->altivec_padding_size
24723 - info->altivec_size;
24724 gcc_assert (info->altivec_size == 0
24725 || info->altivec_save_offset % 16 == 0);
24727 /* Adjust for AltiVec case. */
24728 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
24731 info->ehrd_offset = info->gp_save_offset - ehrd_size;
24733 info->ehcr_offset = info->ehrd_offset - ehcr_size;
24734 info->cr_save_offset = reg_size; /* first word when 64-bit. */
24735 info->lr_save_offset = 2*reg_size;
24739 info->fp_save_offset = -info->fp_size;
24740 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24741 info->cr_save_offset = info->gp_save_offset - info->cr_size;
24743 if (TARGET_ALTIVEC_ABI)
24745 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
24747 /* Align stack so vector save area is on a quadword boundary. */
24748 if (info->altivec_size != 0)
24749 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
24751 info->altivec_save_offset = info->vrsave_save_offset
24752 - info->altivec_padding_size
24753 - info->altivec_size;
24755 /* Adjust for AltiVec case. */
24756 info->ehrd_offset = info->altivec_save_offset;
24759 info->ehrd_offset = info->cr_save_offset;
24761 info->ehrd_offset -= ehrd_size;
24762 info->lr_save_offset = reg_size;
24765 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
24766 info->save_size = RS6000_ALIGN (info->fp_size
24768 + info->altivec_size
24769 + info->altivec_padding_size
24773 + info->vrsave_size,
24776 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
24778 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
24779 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
24781 /* Determine if we need to save the link register. */
24783 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24785 && !TARGET_PROFILE_KERNEL)
24786 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
24787 #ifdef TARGET_RELOCATABLE
24788 || (DEFAULT_ABI == ABI_V4
24789 && (TARGET_RELOCATABLE || flag_pic > 1)
24790 && !constant_pool_empty_p ())
24792 || rs6000_ra_ever_killed ())
24793 info->lr_save_p = 1;
24795 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
24796 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
24797 && call_used_regs[STATIC_CHAIN_REGNUM]);
24798 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
24800 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
24801 || !(info->savres_strategy & SAVE_INLINE_FPRS)
24802 || !(info->savres_strategy & SAVE_INLINE_VRS)
24803 || !(info->savres_strategy & REST_INLINE_GPRS)
24804 || !(info->savres_strategy & REST_INLINE_FPRS)
24805 || !(info->savres_strategy & REST_INLINE_VRS))
24806 info->lr_save_p = 1;
24808 if (info->lr_save_p)
24809 df_set_regs_ever_live (LR_REGNO, true);
24811 /* Determine if we need to allocate any stack frame:
24813 For AIX we need to push the stack if a frame pointer is needed
24814 (because the stack might be dynamically adjusted), if we are
24815 debugging, if we make calls, or if the sum of fp_save, gp_save,
24816 and local variables are more than the space needed to save all
24817 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
24818 + 18*8 = 288 (GPR13 reserved).
24820 For V.4 we don't have the stack cushion that AIX uses, but assume
24821 that the debugger can handle stackless frames. */
24826 else if (DEFAULT_ABI == ABI_V4)
24827 info->push_p = non_fixed_size != 0;
24829 else if (frame_pointer_needed)
24832 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
24836 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
24842 debug_stack_info (rs6000_stack_t *info)
24844 const char *abi_string;
24847 info = rs6000_stack_info ();
24849 fprintf (stderr, "\nStack information for function %s:\n",
24850 ((current_function_decl && DECL_NAME (current_function_decl))
24851 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
24856 default: abi_string = "Unknown"; break;
24857 case ABI_NONE: abi_string = "NONE"; break;
24858 case ABI_AIX: abi_string = "AIX"; break;
24859 case ABI_ELFv2: abi_string = "ELFv2"; break;
24860 case ABI_DARWIN: abi_string = "Darwin"; break;
24861 case ABI_V4: abi_string = "V.4"; break;
24864 fprintf (stderr, "\tABI = %5s\n", abi_string);
24866 if (TARGET_ALTIVEC_ABI)
24867 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
24869 if (info->first_gp_reg_save != 32)
24870 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
24872 if (info->first_fp_reg_save != 64)
24873 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
24875 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
24876 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
24877 info->first_altivec_reg_save);
24879 if (info->lr_save_p)
24880 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
24882 if (info->cr_save_p)
24883 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
24885 if (info->vrsave_mask)
24886 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
24889 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
24892 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
24895 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
24898 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
24900 if (info->altivec_size)
24901 fprintf (stderr, "\taltivec_save_offset = %5d\n",
24902 info->altivec_save_offset);
24904 if (info->vrsave_size)
24905 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
24906 info->vrsave_save_offset);
24908 if (info->lr_save_p)
24909 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
24911 if (info->cr_save_p)
24912 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
24914 if (info->varargs_save_offset)
24915 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
24917 if (info->total_size)
24918 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24921 if (info->vars_size)
24922 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24925 if (info->parm_size)
24926 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
24928 if (info->fixed_size)
24929 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
24932 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
24935 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
24937 if (info->altivec_size)
24938 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
24940 if (info->vrsave_size)
24941 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
24943 if (info->altivec_padding_size)
24944 fprintf (stderr, "\taltivec_padding_size= %5d\n",
24945 info->altivec_padding_size);
24948 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
24950 if (info->save_size)
24951 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
24953 if (info->reg_size != 4)
24954 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
24956 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
24958 fprintf (stderr, "\n");
24962 rs6000_return_addr (int count, rtx frame)
24964 /* We can't use get_hard_reg_initial_val for LR when count == 0 if LR
24965 is trashed by the prologue, as it is for PIC on ABI_V4 and Darwin. */
24967 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
24969 cfun->machine->ra_needs_full_frame = 1;
24972 /* FRAME is set to frame_pointer_rtx by the generic code, but that
24973 is good for loading 0(r1) only when !FRAME_GROWS_DOWNWARD. */
24974 frame = stack_pointer_rtx;
24975 rtx prev_frame_addr = memory_address (Pmode, frame);
24976 rtx prev_frame = copy_to_reg (gen_rtx_MEM (Pmode, prev_frame_addr));
24977 rtx lr_save_off = plus_constant (Pmode,
24978 prev_frame, RETURN_ADDRESS_OFFSET);
24979 rtx lr_save_addr = memory_address (Pmode, lr_save_off);
24980 return gen_rtx_MEM (Pmode, lr_save_addr);
24983 cfun->machine->ra_need_lr = 1;
24984 return get_hard_reg_initial_val (Pmode, LR_REGNO);
24987 /* Say whether a function is a candidate for sibcall handling or not. */
24990 rs6000_function_ok_for_sibcall (tree decl, tree exp)
24994 /* The sibcall epilogue may clobber the static chain register.
24995 ??? We could work harder and avoid that, but it's probably
24996 not worth the hassle in practice. */
24997 if (CALL_EXPR_STATIC_CHAIN (exp))
25001 fntype = TREE_TYPE (decl);
25003 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
25005 /* We can't do it if the called function has more vector parameters
25006 than the current function; there's nowhere to put the VRsave code. */
25007 if (TARGET_ALTIVEC_ABI
25008 && TARGET_ALTIVEC_VRSAVE
25009 && !(decl && decl == current_function_decl))
25011 function_args_iterator args_iter;
25015 /* Functions with vector parameters are required to have a
25016 prototype, so the argument type info must be available
25018 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
25019 if (TREE_CODE (type) == VECTOR_TYPE
25020 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
25023 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
25024 if (TREE_CODE (type) == VECTOR_TYPE
25025 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
25032 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
25033 functions, because the callee may have a different TOC pointer to
25034 the caller and there's no way to ensure we restore the TOC when
25035 we return. With the secure-plt SYSV ABI we can't make non-local
25036 calls when -fpic/PIC because the plt call stubs use r30. */
25037 if (DEFAULT_ABI == ABI_DARWIN
25038 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25040 && !DECL_EXTERNAL (decl)
25041 && !DECL_WEAK (decl)
25042 && (*targetm.binds_local_p) (decl))
25043 || (DEFAULT_ABI == ABI_V4
25044 && (!TARGET_SECURE_PLT
25047 && (*targetm.binds_local_p) (decl)))))
25049 tree attr_list = TYPE_ATTRIBUTES (fntype);
25051 if (!lookup_attribute ("longcall", attr_list)
25052 || lookup_attribute ("shortcall", attr_list))
25060 rs6000_ra_ever_killed (void)
25066 if (cfun->is_thunk)
25069 if (cfun->machine->lr_save_state)
25070 return cfun->machine->lr_save_state - 1;
25072 /* regs_ever_live has LR marked as used if any sibcalls are present,
25073 but this should not force saving and restoring in the
25074 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
25075 clobbers LR, so that is inappropriate. */
25077 /* Also, the prologue can generate a store into LR that
25078 doesn't really count, like this:
25081 bcl to set PIC register
25085 When we're called from the epilogue, we need to avoid counting
25086 this as a store. */
25088 push_topmost_sequence ();
25089 top = get_insns ();
25090 pop_topmost_sequence ();
25091 reg = gen_rtx_REG (Pmode, LR_REGNO);
25093 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
25099 if (!SIBLING_CALL_P (insn))
25102 else if (find_regno_note (insn, REG_INC, LR_REGNO))
25104 else if (set_of (reg, insn) != NULL_RTX
25105 && !prologue_epilogue_contains (insn))
25112 /* Emit instructions needed to load the TOC register.
25113 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
25114 a constant pool; or for SVR4 -fpic. */
25117 rs6000_emit_load_toc_table (int fromprolog)
25120 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25122 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
25125 rtx lab, tmp1, tmp2, got;
25127 lab = gen_label_rtx ();
25128 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
25129 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25132 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25136 got = rs6000_got_sym ();
25137 tmp1 = tmp2 = dest;
25140 tmp1 = gen_reg_rtx (Pmode);
25141 tmp2 = gen_reg_rtx (Pmode);
25143 emit_insn (gen_load_toc_v4_PIC_1 (lab));
25144 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
25145 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
25146 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
25148 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
25150 emit_insn (gen_load_toc_v4_pic_si ());
25151 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25153 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
25156 rtx temp0 = (fromprolog
25157 ? gen_rtx_REG (Pmode, 0)
25158 : gen_reg_rtx (Pmode));
25164 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
25165 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25167 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
25168 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25170 emit_insn (gen_load_toc_v4_PIC_1 (symF));
25171 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25172 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
25178 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25180 lab = gen_label_rtx ();
25181 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
25182 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25183 if (TARGET_LINK_STACK)
25184 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
25185 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
25187 emit_insn (gen_addsi3 (dest, temp0, dest));
25189 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
25191 /* This is for AIX code running in non-PIC ELF32. */
25192 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25195 emit_insn (gen_elf_high (dest, realsym));
25196 emit_insn (gen_elf_low (dest, dest, realsym));
25200 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
25203 emit_insn (gen_load_toc_aix_si (dest));
25205 emit_insn (gen_load_toc_aix_di (dest));
25209 /* Emit instructions to restore the link register after determining where
25210 its value has been stored. */
25213 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
25215 rs6000_stack_t *info = rs6000_stack_info ();
25218 operands[0] = source;
25219 operands[1] = scratch;
25221 if (info->lr_save_p)
25223 rtx frame_rtx = stack_pointer_rtx;
25224 HOST_WIDE_INT sp_offset = 0;
25227 if (frame_pointer_needed
25228 || cfun->calls_alloca
25229 || info->total_size > 32767)
25231 tmp = gen_frame_mem (Pmode, frame_rtx);
25232 emit_move_insn (operands[1], tmp);
25233 frame_rtx = operands[1];
25235 else if (info->push_p)
25236 sp_offset = info->total_size;
25238 tmp = plus_constant (Pmode, frame_rtx,
25239 info->lr_save_offset + sp_offset);
25240 tmp = gen_frame_mem (Pmode, tmp);
25241 emit_move_insn (tmp, operands[0]);
25244 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
25246 /* Freeze lr_save_p. We've just emitted rtl that depends on the
25247 state of lr_save_p so any change from here on would be a bug. In
25248 particular, stop rs6000_ra_ever_killed from considering the SET
25249 of lr we may have added just above. */
25250 cfun->machine->lr_save_state = info->lr_save_p + 1;
25253 static GTY(()) alias_set_type set = -1;
25256 get_TOC_alias_set (void)
25259 set = new_alias_set ();
25263 /* This returns nonzero if the current function uses the TOC. This is
25264 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
25265 is generated by the ABI_V4 load_toc_* patterns.
25266 Return 2 instead of 1 if the load_toc_* pattern is in the function
25267 partition that doesn't start the function. */
25275 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25279 rtx pat = PATTERN (insn);
25282 if (GET_CODE (pat) == PARALLEL)
25283 for (i = 0; i < XVECLEN (pat, 0); i++)
25285 rtx sub = XVECEXP (pat, 0, i);
25286 if (GET_CODE (sub) == USE)
25288 sub = XEXP (sub, 0);
25289 if (GET_CODE (sub) == UNSPEC
25290 && XINT (sub, 1) == UNSPEC_TOC)
25295 else if (crtl->has_bb_partition
25297 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
25305 create_TOC_reference (rtx symbol, rtx largetoc_reg)
25307 rtx tocrel, tocreg, hi;
25309 if (TARGET_DEBUG_ADDR)
25311 if (SYMBOL_REF_P (symbol))
25312 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
25316 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
25317 GET_RTX_NAME (GET_CODE (symbol)));
25318 debug_rtx (symbol);
25322 if (!can_create_pseudo_p ())
25323 df_set_regs_ever_live (TOC_REGISTER, true);
25325 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
25326 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
25327 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
25330 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
25331 if (largetoc_reg != NULL)
25333 emit_move_insn (largetoc_reg, hi);
25336 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
25339 /* Issue assembly directives that create a reference to the given DWARF
25340 FRAME_TABLE_LABEL from the current function section. */
25342 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
25344 fprintf (asm_out_file, "\t.ref %s\n",
25345 (* targetm.strip_name_encoding) (frame_table_label));
25348 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
25349 and the change to the stack pointer. */
25352 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
25359 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25360 if (hard_frame_needed)
25361 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
25362 if (!(REGNO (fp) == STACK_POINTER_REGNUM
25363 || (hard_frame_needed
25364 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
25367 p = rtvec_alloc (i);
25370 rtx mem = gen_frame_mem (BLKmode, regs[i]);
25371 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
25374 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
25377 /* Allocate SIZE_INT bytes on the stack using a store with update style insn
25378 and set the appropriate attributes for the generated insn. Return the
25379 first insn which adjusts the stack pointer or the last insn before
25380 the stack adjustment loop.
25382 SIZE_INT is used to create the CFI note for the allocation.
25384 SIZE_RTX is an rtx containing the size of the adjustment. Note that
25385 since stacks grow to lower addresses its runtime value is -SIZE_INT.
25387 ORIG_SP contains the backchain value that must be stored at *sp. */
25390 rs6000_emit_allocate_stack_1 (HOST_WIDE_INT size_int, rtx orig_sp)
25394 rtx size_rtx = GEN_INT (-size_int);
25395 if (size_int > 32767)
25397 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
25398 /* Need a note here so that try_split doesn't get confused. */
25399 if (get_last_insn () == NULL_RTX)
25400 emit_note (NOTE_INSN_DELETED);
25401 insn = emit_move_insn (tmp_reg, size_rtx);
25402 try_split (PATTERN (insn), insn, 0);
25403 size_rtx = tmp_reg;
25406 if (Pmode == SImode)
25407 insn = emit_insn (gen_movsi_update_stack (stack_pointer_rtx,
25412 insn = emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx,
25416 rtx par = PATTERN (insn);
25417 gcc_assert (GET_CODE (par) == PARALLEL);
25418 rtx set = XVECEXP (par, 0, 0);
25419 gcc_assert (GET_CODE (set) == SET);
25420 rtx mem = SET_DEST (set);
25421 gcc_assert (MEM_P (mem));
25422 MEM_NOTRAP_P (mem) = 1;
25423 set_mem_alias_set (mem, get_frame_alias_set ());
25425 RTX_FRAME_RELATED_P (insn) = 1;
25426 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25427 gen_rtx_SET (stack_pointer_rtx,
25428 gen_rtx_PLUS (Pmode,
25430 GEN_INT (-size_int))));
25432 /* Emit a blockage to ensure the allocation/probing insns are
25433 not optimized, combined, removed, etc. Add REG_STACK_CHECK
25434 note for similar reasons. */
25435 if (flag_stack_clash_protection)
25437 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
25438 emit_insn (gen_blockage ());
25444 static HOST_WIDE_INT
25445 get_stack_clash_protection_probe_interval (void)
25447 return (HOST_WIDE_INT_1U
25448 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
25451 static HOST_WIDE_INT
25452 get_stack_clash_protection_guard_size (void)
25454 return (HOST_WIDE_INT_1U
25455 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE));
25458 /* Allocate ORIG_SIZE bytes on the stack and probe the newly
25459 allocated space every STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes.
25461 COPY_REG, if non-null, should contain a copy of the original
25462 stack pointer at exit from this function.
25464 This is subtly different than the Ada probing in that it tries hard to
25465 prevent attacks that jump the stack guard. Thus it is never allowed to
25466 allocate more than STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes of stack
25467 space without a suitable probe. */
25469 rs6000_emit_probe_stack_range_stack_clash (HOST_WIDE_INT orig_size,
25472 rtx orig_sp = copy_reg;
25474 HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
25476 /* Round the size down to a multiple of PROBE_INTERVAL. */
25477 HOST_WIDE_INT rounded_size = ROUND_DOWN (orig_size, probe_interval);
25479 /* If explicitly requested,
25480 or the rounded size is not the same as the original size
25481 or the the rounded size is greater than a page,
25482 then we will need a copy of the original stack pointer. */
25483 if (rounded_size != orig_size
25484 || rounded_size > probe_interval
25487 /* If the caller did not request a copy of the incoming stack
25488 pointer, then we use r0 to hold the copy. */
25490 orig_sp = gen_rtx_REG (Pmode, 0);
25491 emit_move_insn (orig_sp, stack_pointer_rtx);
25494 /* There's three cases here.
25496 One is a single probe which is the most common and most efficiently
25497 implemented as it does not have to have a copy of the original
25498 stack pointer if there are no residuals.
25500 Second is unrolled allocation/probes which we use if there's just
25501 a few of them. It needs to save the original stack pointer into a
25502 temporary for use as a source register in the allocation/probe.
25504 Last is a loop. This is the most uncommon case and least efficient. */
25505 rtx_insn *retval = NULL;
25506 if (rounded_size == probe_interval)
25508 retval = rs6000_emit_allocate_stack_1 (probe_interval, stack_pointer_rtx);
25510 dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
25512 else if (rounded_size <= 8 * probe_interval)
25514 /* The ABI requires using the store with update insns to allocate
25515 space and store the backchain into the stack
25517 So we save the current stack pointer into a temporary, then
25518 emit the store-with-update insns to store the saved stack pointer
25519 into the right location in each new page. */
25520 for (int i = 0; i < rounded_size; i += probe_interval)
25523 = rs6000_emit_allocate_stack_1 (probe_interval, orig_sp);
25525 /* Save the first stack adjustment in RETVAL. */
25530 dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
25534 /* Compute the ending address. */
25536 = copy_reg ? gen_rtx_REG (Pmode, 0) : gen_rtx_REG (Pmode, 12);
25537 rtx rs = GEN_INT (-rounded_size);
25539 if (add_operand (rs, Pmode))
25540 insn = emit_insn (gen_add3_insn (end_addr, stack_pointer_rtx, rs));
25543 emit_move_insn (end_addr, GEN_INT (-rounded_size));
25544 insn = emit_insn (gen_add3_insn (end_addr, end_addr,
25545 stack_pointer_rtx));
25546 /* Describe the effect of INSN to the CFI engine. */
25547 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25548 gen_rtx_SET (end_addr,
25549 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25552 RTX_FRAME_RELATED_P (insn) = 1;
25554 /* Emit the loop. */
25556 retval = emit_insn (gen_probe_stack_rangedi (stack_pointer_rtx,
25557 stack_pointer_rtx, orig_sp,
25560 retval = emit_insn (gen_probe_stack_rangesi (stack_pointer_rtx,
25561 stack_pointer_rtx, orig_sp,
25563 RTX_FRAME_RELATED_P (retval) = 1;
25564 /* Describe the effect of INSN to the CFI engine. */
25565 add_reg_note (retval, REG_FRAME_RELATED_EXPR,
25566 gen_rtx_SET (stack_pointer_rtx, end_addr));
25568 /* Emit a blockage to ensure the allocation/probing insns are
25569 not optimized, combined, removed, etc. Other cases handle this
25570 within their call to rs6000_emit_allocate_stack_1. */
25571 emit_insn (gen_blockage ());
25573 dump_stack_clash_frame_info (PROBE_LOOP, rounded_size != orig_size);
25576 if (orig_size != rounded_size)
25578 /* Allocate (and implicitly probe) any residual space. */
25579 HOST_WIDE_INT residual = orig_size - rounded_size;
25581 rtx_insn *insn = rs6000_emit_allocate_stack_1 (residual, orig_sp);
25583 /* If the residual was the only allocation, then we can return the
25584 allocating insn. */
25592 /* Emit the correct code for allocating stack space, as insns.
25593 If COPY_REG, make sure a copy of the old frame is left there.
25594 The generated code may use hard register 0 as a temporary. */
25597 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
25600 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25601 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
25602 rtx todec = gen_int_mode (-size, Pmode);
25604 if (INTVAL (todec) != -size)
25606 warning (0, "stack frame too large");
25607 emit_insn (gen_trap ());
25611 if (crtl->limit_stack)
25613 if (REG_P (stack_limit_rtx)
25614 && REGNO (stack_limit_rtx) > 1
25615 && REGNO (stack_limit_rtx) <= 31)
25618 = gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size));
25621 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, const0_rtx));
25623 else if (SYMBOL_REF_P (stack_limit_rtx)
25625 && DEFAULT_ABI == ABI_V4
25628 rtx toload = gen_rtx_CONST (VOIDmode,
25629 gen_rtx_PLUS (Pmode,
25633 emit_insn (gen_elf_high (tmp_reg, toload));
25634 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
25635 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
25639 warning (0, "stack limit expression is not supported");
25642 if (flag_stack_clash_protection)
25644 if (size < get_stack_clash_protection_guard_size ())
25645 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
25648 rtx_insn *insn = rs6000_emit_probe_stack_range_stack_clash (size,
25651 /* If we asked for a copy with an offset, then we still need add in
25653 if (copy_reg && copy_off)
25654 emit_insn (gen_add3_insn (copy_reg, copy_reg, GEN_INT (copy_off)));
25662 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
25664 emit_move_insn (copy_reg, stack_reg);
25667 /* Since we didn't use gen_frame_mem to generate the MEM, grab
25668 it now and set the alias set/attributes. The above gen_*_update
25669 calls will generate a PARALLEL with the MEM set being the first
25671 insn = rs6000_emit_allocate_stack_1 (size, stack_reg);
25675 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
25677 #if PROBE_INTERVAL > 32768
25678 #error Cannot use indexed addressing mode for stack probing
25681 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
25682 inclusive. These are offsets from the current stack pointer. */
25685 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
25687 /* See if we have a constant small number of probes to generate. If so,
25688 that's the easy case. */
25689 if (first + size <= 32768)
25693 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
25694 it exceeds SIZE. If only one probe is needed, this will not
25695 generate any code. Then probe at FIRST + SIZE. */
25696 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
25697 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25700 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25704 /* Otherwise, do the same as above, but in a loop. Note that we must be
25705 extra careful with variables wrapping around because we might be at
25706 the very top (or the very bottom) of the address space and we have
25707 to be able to handle this case properly; in particular, we use an
25708 equality test for the loop condition. */
25711 HOST_WIDE_INT rounded_size;
25712 rtx r12 = gen_rtx_REG (Pmode, 12);
25713 rtx r0 = gen_rtx_REG (Pmode, 0);
25715 /* Sanity check for the addressing mode we're going to use. */
25716 gcc_assert (first <= 32768);
25718 /* Step 1: round SIZE to the previous multiple of the interval. */
25720 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
25723 /* Step 2: compute initial and final value of the loop counter. */
25725 /* TEST_ADDR = SP + FIRST. */
25726 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
25729 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
25730 if (rounded_size > 32768)
25732 emit_move_insn (r0, GEN_INT (-rounded_size));
25733 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
25736 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
25740 /* Step 3: the loop
25744 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
25747 while (TEST_ADDR != LAST_ADDR)
25749 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
25750 until it is equal to ROUNDED_SIZE. */
25753 emit_insn (gen_probe_stack_rangedi (r12, r12, stack_pointer_rtx, r0));
25755 emit_insn (gen_probe_stack_rangesi (r12, r12, stack_pointer_rtx, r0));
25758 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
25759 that SIZE is equal to ROUNDED_SIZE. */
25761 if (size != rounded_size)
25762 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
25766 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
25767 addresses, not offsets. */
25769 static const char *
25770 output_probe_stack_range_1 (rtx reg1, rtx reg2)
25772 static int labelno = 0;
25776 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25779 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25781 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
25783 xops[1] = GEN_INT (-PROBE_INTERVAL);
25784 output_asm_insn ("addi %0,%0,%1", xops);
25786 /* Probe at TEST_ADDR. */
25787 xops[1] = gen_rtx_REG (Pmode, 0);
25788 output_asm_insn ("stw %1,0(%0)", xops);
25790 /* Test if TEST_ADDR == LAST_ADDR. */
25793 output_asm_insn ("cmpd 0,%0,%1", xops);
25795 output_asm_insn ("cmpw 0,%0,%1", xops);
25798 fputs ("\tbne 0,", asm_out_file);
25799 assemble_name_raw (asm_out_file, loop_lab);
25800 fputc ('\n', asm_out_file);
25805 /* This function is called when rs6000_frame_related is processing
25806 SETs within a PARALLEL, and returns whether the REGNO save ought to
25807 be marked RTX_FRAME_RELATED_P. The PARALLELs involved are those
25808 for out-of-line register save functions, store multiple, and the
25809 Darwin world_save. They may contain registers that don't really
25813 interesting_frame_related_regno (unsigned int regno)
25815 /* Saves apparently of r0 are actually saving LR. It doesn't make
25816 sense to substitute the regno here to test save_reg_p (LR_REGNO).
25817 We *know* LR needs saving, and dwarf2cfi.c is able to deduce that
25818 (set (mem) (r0)) is saving LR from a prior (set (r0) (lr)) marked
25819 as frame related. */
25822 /* If we see CR2 then we are here on a Darwin world save. Saves of
25823 CR2 signify the whole CR is being saved. This is a long-standing
25824 ABI wart fixed by ELFv2. As for r0/lr there is no need to check
25825 that CR needs to be saved. */
25826 if (regno == CR2_REGNO)
25828 /* Omit frame info for any user-defined global regs. If frame info
25829 is supplied for them, frame unwinding will restore a user reg.
25830 Also omit frame info for any reg we don't need to save, as that
25831 bloats frame info and can cause problems with shrink wrapping.
25832 Since global regs won't be seen as needing to be saved, both of
25833 these conditions are covered by save_reg_p. */
25834 return save_reg_p (regno);
25837 /* Probe a range of stack addresses from REG1 to REG3 inclusive. These are
25838 addresses, not offsets.
25840 REG2 contains the backchain that must be stored into *sp at each allocation.
25842 This is subtly different than the Ada probing above in that it tries hard
25843 to prevent attacks that jump the stack guard. Thus, it is never allowed
25844 to allocate more than PROBE_INTERVAL bytes of stack space without a
25847 static const char *
25848 output_probe_stack_range_stack_clash (rtx reg1, rtx reg2, rtx reg3)
25850 static int labelno = 0;
25854 HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
25856 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25858 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25860 /* This allocates and probes. */
25863 xops[2] = GEN_INT (-probe_interval);
25865 output_asm_insn ("stdu %1,%2(%0)", xops);
25867 output_asm_insn ("stwu %1,%2(%0)", xops);
25869 /* Jump to LOOP_LAB if TEST_ADDR != LAST_ADDR. */
25873 output_asm_insn ("cmpd 0,%0,%1", xops);
25875 output_asm_insn ("cmpw 0,%0,%1", xops);
25877 fputs ("\tbne 0,", asm_out_file);
25878 assemble_name_raw (asm_out_file, loop_lab);
25879 fputc ('\n', asm_out_file);
25884 /* Wrapper around the output_probe_stack_range routines. */
25886 output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
25888 if (flag_stack_clash_protection)
25889 return output_probe_stack_range_stack_clash (reg1, reg2, reg3);
25891 return output_probe_stack_range_1 (reg1, reg3);
25894 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
25895 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
25896 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
25897 deduce these equivalences by itself so it wasn't necessary to hold
25898 its hand so much. Don't be tempted to always supply d2_f_d_e with
25899 the actual cfa register, ie. r31 when we are using a hard frame
25900 pointer. That fails when saving regs off r1, and sched moves the
25901 r31 setup past the reg saves. */
25904 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
25905 rtx reg2, rtx repl2)
25909 if (REGNO (reg) == STACK_POINTER_REGNUM)
25911 gcc_checking_assert (val == 0);
25915 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
25918 rtx pat = PATTERN (insn);
25919 if (!repl && !reg2)
25921 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
25922 if (GET_CODE (pat) == PARALLEL)
25923 for (int i = 0; i < XVECLEN (pat, 0); i++)
25924 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25926 rtx set = XVECEXP (pat, 0, i);
25928 if (!REG_P (SET_SRC (set))
25929 || interesting_frame_related_regno (REGNO (SET_SRC (set))))
25930 RTX_FRAME_RELATED_P (set) = 1;
25932 RTX_FRAME_RELATED_P (insn) = 1;
25936 /* We expect that 'pat' is either a SET or a PARALLEL containing
25937 SETs (and possibly other stuff). In a PARALLEL, all the SETs
25938 are important so they all have to be marked RTX_FRAME_RELATED_P.
25939 Call simplify_replace_rtx on the SETs rather than the whole insn
25940 so as to leave the other stuff alone (for example USE of r12). */
25942 set_used_flags (pat);
25943 if (GET_CODE (pat) == SET)
25946 pat = simplify_replace_rtx (pat, reg, repl);
25948 pat = simplify_replace_rtx (pat, reg2, repl2);
25950 else if (GET_CODE (pat) == PARALLEL)
25952 pat = shallow_copy_rtx (pat);
25953 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
25955 for (int i = 0; i < XVECLEN (pat, 0); i++)
25956 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25958 rtx set = XVECEXP (pat, 0, i);
25961 set = simplify_replace_rtx (set, reg, repl);
25963 set = simplify_replace_rtx (set, reg2, repl2);
25964 XVECEXP (pat, 0, i) = set;
25966 if (!REG_P (SET_SRC (set))
25967 || interesting_frame_related_regno (REGNO (SET_SRC (set))))
25968 RTX_FRAME_RELATED_P (set) = 1;
25972 gcc_unreachable ();
25974 RTX_FRAME_RELATED_P (insn) = 1;
25975 add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
25980 /* Returns an insn that has a vrsave set operation with the
25981 appropriate CLOBBERs. */
25984 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
25987 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
25988 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
25991 = gen_rtx_SET (vrsave,
25992 gen_rtx_UNSPEC_VOLATILE (SImode,
25993 gen_rtvec (2, reg, vrsave),
25994 UNSPECV_SET_VRSAVE));
25998 /* We need to clobber the registers in the mask so the scheduler
25999 does not move sets to VRSAVE before sets of AltiVec registers.
26001 However, if the function receives nonlocal gotos, reload will set
26002 all call saved registers live. We will end up with:
26004 (set (reg 999) (mem))
26005 (parallel [ (set (reg vrsave) (unspec blah))
26006 (clobber (reg 999))])
26008 The clobber will cause the store into reg 999 to be dead, and
26009 flow will attempt to delete an epilogue insn. In this case, we
26010 need an unspec use/set of the register. */
26012 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
26013 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
26015 if (!epiloguep || call_used_regs [i])
26016 clobs[nclobs++] = gen_hard_reg_clobber (V4SImode, i);
26019 rtx reg = gen_rtx_REG (V4SImode, i);
26022 = gen_rtx_SET (reg,
26023 gen_rtx_UNSPEC (V4SImode,
26024 gen_rtvec (1, reg), 27));
26028 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
26030 for (i = 0; i < nclobs; ++i)
26031 XVECEXP (insn, 0, i) = clobs[i];
26037 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
26041 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
26042 mem = gen_frame_mem (GET_MODE (reg), addr);
26043 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
26047 gen_frame_load (rtx reg, rtx frame_reg, int offset)
26049 return gen_frame_set (reg, frame_reg, offset, false);
26053 gen_frame_store (rtx reg, rtx frame_reg, int offset)
26055 return gen_frame_set (reg, frame_reg, offset, true);
26058 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
26059 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
26062 emit_frame_save (rtx frame_reg, machine_mode mode,
26063 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
26067 /* Some cases that need register indexed addressing. */
26068 gcc_checking_assert (!(TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
26069 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode)));
26071 reg = gen_rtx_REG (mode, regno);
26072 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
26073 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
26074 NULL_RTX, NULL_RTX);
26077 /* Emit an offset memory reference suitable for a frame store, while
26078 converting to a valid addressing mode. */
26081 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
26083 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, GEN_INT (offset)));
26086 #ifndef TARGET_FIX_AND_CONTINUE
26087 #define TARGET_FIX_AND_CONTINUE 0
26090 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
26091 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
26092 #define LAST_SAVRES_REGISTER 31
26093 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
26104 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
26106 /* Temporary holding space for an out-of-line register save/restore
26108 static char savres_routine_name[30];
26110 /* Return the name for an out-of-line register save/restore routine.
26111 We are saving/restoring GPRs if GPR is true. */
26114 rs6000_savres_routine_name (int regno, int sel)
26116 const char *prefix = "";
26117 const char *suffix = "";
26119 /* Different targets are supposed to define
26120 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
26121 routine name could be defined with:
26123 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
26125 This is a nice idea in practice, but in reality, things are
26126 complicated in several ways:
26128 - ELF targets have save/restore routines for GPRs.
26130 - PPC64 ELF targets have routines for save/restore of GPRs that
26131 differ in what they do with the link register, so having a set
26132 prefix doesn't work. (We only use one of the save routines at
26133 the moment, though.)
26135 - PPC32 elf targets have "exit" versions of the restore routines
26136 that restore the link register and can save some extra space.
26137 These require an extra suffix. (There are also "tail" versions
26138 of the restore routines and "GOT" versions of the save routines,
26139 but we don't generate those at present. Same problems apply,
26142 We deal with all this by synthesizing our own prefix/suffix and
26143 using that for the simple sprintf call shown above. */
26144 if (DEFAULT_ABI == ABI_V4)
26149 if ((sel & SAVRES_REG) == SAVRES_GPR)
26150 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
26151 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26152 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
26153 else if ((sel & SAVRES_REG) == SAVRES_VR)
26154 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
26158 if ((sel & SAVRES_LR))
26161 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26163 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
26164 /* No out-of-line save/restore routines for GPRs on AIX. */
26165 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
26169 if ((sel & SAVRES_REG) == SAVRES_GPR)
26170 prefix = ((sel & SAVRES_SAVE)
26171 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
26172 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
26173 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26175 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
26176 if ((sel & SAVRES_LR))
26177 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
26181 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
26182 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
26185 else if ((sel & SAVRES_REG) == SAVRES_VR)
26186 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
26191 if (DEFAULT_ABI == ABI_DARWIN)
26193 /* The Darwin approach is (slightly) different, in order to be
26194 compatible with code generated by the system toolchain. There is a
26195 single symbol for the start of save sequence, and the code here
26196 embeds an offset into that code on the basis of the first register
26198 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
26199 if ((sel & SAVRES_REG) == SAVRES_GPR)
26200 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
26201 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
26202 (regno - 13) * 4, prefix, regno);
26203 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26204 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
26205 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
26206 else if ((sel & SAVRES_REG) == SAVRES_VR)
26207 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
26208 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
26213 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
26215 return savres_routine_name;
26218 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
26219 We are saving/restoring GPRs if GPR is true. */
26222 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
26224 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
26225 ? info->first_gp_reg_save
26226 : (sel & SAVRES_REG) == SAVRES_FPR
26227 ? info->first_fp_reg_save - 32
26228 : (sel & SAVRES_REG) == SAVRES_VR
26229 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
26234 /* Don't generate bogus routine names. */
26235 gcc_assert (FIRST_SAVRES_REGISTER <= regno
26236 && regno <= LAST_SAVRES_REGISTER
26237 && select >= 0 && select <= 12);
26239 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
26245 name = rs6000_savres_routine_name (regno, sel);
26247 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
26248 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
26249 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
26255 /* Emit a sequence of insns, including a stack tie if needed, for
26256 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
26257 reset the stack pointer, but move the base of the frame into
26258 reg UPDT_REGNO for use by out-of-line register restore routines. */
26261 rs6000_emit_stack_reset (rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
26262 unsigned updt_regno)
26264 /* If there is nothing to do, don't do anything. */
26265 if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
26268 rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
26270 /* This blockage is needed so that sched doesn't decide to move
26271 the sp change before the register restores. */
26272 if (DEFAULT_ABI == ABI_V4)
26273 return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
26274 GEN_INT (frame_off)));
26276 /* If we are restoring registers out-of-line, we will be using the
26277 "exit" variants of the restore routines, which will reset the
26278 stack for us. But we do need to point updt_reg into the
26279 right place for those routines. */
26280 if (frame_off != 0)
26281 return emit_insn (gen_add3_insn (updt_reg_rtx,
26282 frame_reg_rtx, GEN_INT (frame_off)));
26284 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
26289 /* Return the register number used as a pointer by out-of-line
26290 save/restore functions. */
26292 static inline unsigned
26293 ptr_regno_for_savres (int sel)
26295 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26296 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
26297 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
26300 /* Construct a parallel rtx describing the effect of a call to an
26301 out-of-line register save/restore routine, and emit the insn
26302 or jump_insn as appropriate. */
26305 rs6000_emit_savres_rtx (rs6000_stack_t *info,
26306 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
26307 machine_mode reg_mode, int sel)
26310 int offset, start_reg, end_reg, n_regs, use_reg;
26311 int reg_size = GET_MODE_SIZE (reg_mode);
26318 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26319 ? info->first_gp_reg_save
26320 : (sel & SAVRES_REG) == SAVRES_FPR
26321 ? info->first_fp_reg_save
26322 : (sel & SAVRES_REG) == SAVRES_VR
26323 ? info->first_altivec_reg_save
26325 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26327 : (sel & SAVRES_REG) == SAVRES_FPR
26329 : (sel & SAVRES_REG) == SAVRES_VR
26330 ? LAST_ALTIVEC_REGNO + 1
26332 n_regs = end_reg - start_reg;
26333 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
26334 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
26337 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26338 RTVEC_ELT (p, offset++) = ret_rtx;
26340 RTVEC_ELT (p, offset++) = gen_hard_reg_clobber (Pmode, LR_REGNO);
26342 sym = rs6000_savres_routine_sym (info, sel);
26343 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
26345 use_reg = ptr_regno_for_savres (sel);
26346 if ((sel & SAVRES_REG) == SAVRES_VR)
26348 /* Vector regs are saved/restored using [reg+reg] addressing. */
26349 RTVEC_ELT (p, offset++) = gen_hard_reg_clobber (Pmode, use_reg);
26350 RTVEC_ELT (p, offset++)
26351 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
26354 RTVEC_ELT (p, offset++)
26355 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
26357 for (i = 0; i < end_reg - start_reg; i++)
26358 RTVEC_ELT (p, i + offset)
26359 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
26360 frame_reg_rtx, save_area_offset + reg_size * i,
26361 (sel & SAVRES_SAVE) != 0);
26363 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26364 RTVEC_ELT (p, i + offset)
26365 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
26367 par = gen_rtx_PARALLEL (VOIDmode, p);
26369 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26371 insn = emit_jump_insn (par);
26372 JUMP_LABEL (insn) = ret_rtx;
26375 insn = emit_insn (par);
26379 /* Emit prologue code to store CR fields that need to be saved into REG. This
26380 function should only be called when moving the non-volatile CRs to REG, it
26381 is not a general purpose routine to move the entire set of CRs to REG.
26382 Specifically, gen_prologue_movesi_from_cr() does not contain uses of the
26386 rs6000_emit_prologue_move_from_cr (rtx reg)
26388 /* Only the ELFv2 ABI allows storing only selected fields. */
26389 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
26391 int i, cr_reg[8], count = 0;
26393 /* Collect CR fields that must be saved. */
26394 for (i = 0; i < 8; i++)
26395 if (save_reg_p (CR0_REGNO + i))
26396 cr_reg[count++] = i;
26398 /* If it's just a single one, use mfcrf. */
26401 rtvec p = rtvec_alloc (1);
26402 rtvec r = rtvec_alloc (2);
26403 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
26404 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
26406 = gen_rtx_SET (reg,
26407 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
26409 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26413 /* ??? It might be better to handle count == 2 / 3 cases here
26414 as well, using logical operations to combine the values. */
26417 emit_insn (gen_prologue_movesi_from_cr (reg));
26420 /* Return whether the split-stack arg pointer (r12) is used. */
26423 split_stack_arg_pointer_used_p (void)
26425 /* If the pseudo holding the arg pointer is no longer a pseudo,
26426 then the arg pointer is used. */
26427 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
26428 && (!REG_P (cfun->machine->split_stack_arg_pointer)
26429 || HARD_REGISTER_P (cfun->machine->split_stack_arg_pointer)))
26432 /* Unfortunately we also need to do some code scanning, since
26433 r12 may have been substituted for the pseudo. */
26435 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
26436 FOR_BB_INSNS (bb, insn)
26437 if (NONDEBUG_INSN_P (insn))
26439 /* A call destroys r12. */
26444 FOR_EACH_INSN_USE (use, insn)
26446 rtx x = DF_REF_REG (use);
26447 if (REG_P (x) && REGNO (x) == 12)
26451 FOR_EACH_INSN_DEF (def, insn)
26453 rtx x = DF_REF_REG (def);
26454 if (REG_P (x) && REGNO (x) == 12)
26458 return bitmap_bit_p (DF_LR_OUT (bb), 12);
26461 /* Return whether we need to emit an ELFv2 global entry point prologue. */
26464 rs6000_global_entry_point_needed_p (void)
26466 /* Only needed for the ELFv2 ABI. */
26467 if (DEFAULT_ABI != ABI_ELFv2)
26470 /* With -msingle-pic-base, we assume the whole program shares the same
26471 TOC, so no global entry point prologues are needed anywhere. */
26472 if (TARGET_SINGLE_PIC_BASE)
26475 /* Ensure we have a global entry point for thunks. ??? We could
26476 avoid that if the target routine doesn't need a global entry point,
26477 but we do not know whether this is the case at this point. */
26478 if (cfun->is_thunk)
26481 /* For regular functions, rs6000_emit_prologue sets this flag if the
26482 routine ever uses the TOC pointer. */
26483 return cfun->machine->r2_setup_needed;
26486 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
26488 rs6000_get_separate_components (void)
26490 rs6000_stack_t *info = rs6000_stack_info ();
26492 if (WORLD_SAVE_P (info))
26495 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
26496 && !(info->savres_strategy & REST_MULTIPLE));
26498 /* Component 0 is the save/restore of LR (done via GPR0).
26499 Component 2 is the save of the TOC (GPR2).
26500 Components 13..31 are the save/restore of GPR13..GPR31.
26501 Components 46..63 are the save/restore of FPR14..FPR31. */
26503 cfun->machine->n_components = 64;
26505 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
26506 bitmap_clear (components);
26508 int reg_size = TARGET_32BIT ? 4 : 8;
26509 int fp_reg_size = 8;
26511 /* The GPRs we need saved to the frame. */
26512 if ((info->savres_strategy & SAVE_INLINE_GPRS)
26513 && (info->savres_strategy & REST_INLINE_GPRS))
26515 int offset = info->gp_save_offset;
26517 offset += info->total_size;
26519 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
26521 if (IN_RANGE (offset, -0x8000, 0x7fff)
26522 && save_reg_p (regno))
26523 bitmap_set_bit (components, regno);
26525 offset += reg_size;
26529 /* Don't mess with the hard frame pointer. */
26530 if (frame_pointer_needed)
26531 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
26533 /* Don't mess with the fixed TOC register. */
26534 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
26535 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
26536 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
26537 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
26539 /* The FPRs we need saved to the frame. */
26540 if ((info->savres_strategy & SAVE_INLINE_FPRS)
26541 && (info->savres_strategy & REST_INLINE_FPRS))
26543 int offset = info->fp_save_offset;
26545 offset += info->total_size;
26547 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
26549 if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno))
26550 bitmap_set_bit (components, regno);
26552 offset += fp_reg_size;
26556 /* Optimize LR save and restore if we can. This is component 0. Any
26557 out-of-line register save/restore routines need LR. */
26558 if (info->lr_save_p
26559 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
26560 && (info->savres_strategy & SAVE_INLINE_GPRS)
26561 && (info->savres_strategy & REST_INLINE_GPRS)
26562 && (info->savres_strategy & SAVE_INLINE_FPRS)
26563 && (info->savres_strategy & REST_INLINE_FPRS)
26564 && (info->savres_strategy & SAVE_INLINE_VRS)
26565 && (info->savres_strategy & REST_INLINE_VRS))
26567 int offset = info->lr_save_offset;
26569 offset += info->total_size;
26570 if (IN_RANGE (offset, -0x8000, 0x7fff))
26571 bitmap_set_bit (components, 0);
26574 /* Optimize saving the TOC. This is component 2. */
26575 if (cfun->machine->save_toc_in_prologue)
26576 bitmap_set_bit (components, 2);
26581 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
26583 rs6000_components_for_bb (basic_block bb)
26585 rs6000_stack_t *info = rs6000_stack_info ();
26587 bitmap in = DF_LIVE_IN (bb);
26588 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
26589 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
26591 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
26592 bitmap_clear (components);
26594 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
26597 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
26598 if (bitmap_bit_p (in, regno)
26599 || bitmap_bit_p (gen, regno)
26600 || bitmap_bit_p (kill, regno))
26601 bitmap_set_bit (components, regno);
26604 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
26605 if (bitmap_bit_p (in, regno)
26606 || bitmap_bit_p (gen, regno)
26607 || bitmap_bit_p (kill, regno))
26608 bitmap_set_bit (components, regno);
26610 /* The link register. */
26611 if (bitmap_bit_p (in, LR_REGNO)
26612 || bitmap_bit_p (gen, LR_REGNO)
26613 || bitmap_bit_p (kill, LR_REGNO))
26614 bitmap_set_bit (components, 0);
26616 /* The TOC save. */
26617 if (bitmap_bit_p (in, TOC_REGNUM)
26618 || bitmap_bit_p (gen, TOC_REGNUM)
26619 || bitmap_bit_p (kill, TOC_REGNUM))
26620 bitmap_set_bit (components, 2);
26625 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
26627 rs6000_disqualify_components (sbitmap components, edge e,
26628 sbitmap edge_components, bool /*is_prologue*/)
26630 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
26631 live where we want to place that code. */
26632 if (bitmap_bit_p (edge_components, 0)
26633 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
26636 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
26637 "on entry to bb %d\n", e->dest->index);
26638 bitmap_clear_bit (components, 0);
26642 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
26644 rs6000_emit_prologue_components (sbitmap components)
26646 rs6000_stack_t *info = rs6000_stack_info ();
26647 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26648 ? HARD_FRAME_POINTER_REGNUM
26649 : STACK_POINTER_REGNUM);
26651 machine_mode reg_mode = Pmode;
26652 int reg_size = TARGET_32BIT ? 4 : 8;
26653 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
26654 int fp_reg_size = 8;
26656 /* Prologue for LR. */
26657 if (bitmap_bit_p (components, 0))
26659 rtx lr = gen_rtx_REG (reg_mode, LR_REGNO);
26660 rtx reg = gen_rtx_REG (reg_mode, 0);
26661 rtx_insn *insn = emit_move_insn (reg, lr);
26662 RTX_FRAME_RELATED_P (insn) = 1;
26663 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (reg, lr));
26665 int offset = info->lr_save_offset;
26667 offset += info->total_size;
26669 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26670 RTX_FRAME_RELATED_P (insn) = 1;
26671 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
26672 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
26675 /* Prologue for TOC. */
26676 if (bitmap_bit_p (components, 2))
26678 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
26679 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26680 emit_insn (gen_frame_store (reg, sp_reg, RS6000_TOC_SAVE_SLOT));
26683 /* Prologue for the GPRs. */
26684 int offset = info->gp_save_offset;
26686 offset += info->total_size;
26688 for (int i = info->first_gp_reg_save; i < 32; i++)
26690 if (bitmap_bit_p (components, i))
26692 rtx reg = gen_rtx_REG (reg_mode, i);
26693 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26694 RTX_FRAME_RELATED_P (insn) = 1;
26695 rtx set = copy_rtx (single_set (insn));
26696 add_reg_note (insn, REG_CFA_OFFSET, set);
26699 offset += reg_size;
26702 /* Prologue for the FPRs. */
26703 offset = info->fp_save_offset;
26705 offset += info->total_size;
26707 for (int i = info->first_fp_reg_save; i < 64; i++)
26709 if (bitmap_bit_p (components, i))
26711 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26712 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26713 RTX_FRAME_RELATED_P (insn) = 1;
26714 rtx set = copy_rtx (single_set (insn));
26715 add_reg_note (insn, REG_CFA_OFFSET, set);
26718 offset += fp_reg_size;
26722 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
26724 rs6000_emit_epilogue_components (sbitmap components)
26726 rs6000_stack_t *info = rs6000_stack_info ();
26727 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26728 ? HARD_FRAME_POINTER_REGNUM
26729 : STACK_POINTER_REGNUM);
26731 machine_mode reg_mode = Pmode;
26732 int reg_size = TARGET_32BIT ? 4 : 8;
26734 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
26735 int fp_reg_size = 8;
26737 /* Epilogue for the FPRs. */
26738 int offset = info->fp_save_offset;
26740 offset += info->total_size;
26742 for (int i = info->first_fp_reg_save; i < 64; i++)
26744 if (bitmap_bit_p (components, i))
26746 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26747 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26748 RTX_FRAME_RELATED_P (insn) = 1;
26749 add_reg_note (insn, REG_CFA_RESTORE, reg);
26752 offset += fp_reg_size;
26755 /* Epilogue for the GPRs. */
26756 offset = info->gp_save_offset;
26758 offset += info->total_size;
26760 for (int i = info->first_gp_reg_save; i < 32; i++)
26762 if (bitmap_bit_p (components, i))
26764 rtx reg = gen_rtx_REG (reg_mode, i);
26765 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26766 RTX_FRAME_RELATED_P (insn) = 1;
26767 add_reg_note (insn, REG_CFA_RESTORE, reg);
26770 offset += reg_size;
26773 /* Epilogue for LR. */
26774 if (bitmap_bit_p (components, 0))
26776 int offset = info->lr_save_offset;
26778 offset += info->total_size;
26780 rtx reg = gen_rtx_REG (reg_mode, 0);
26781 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26783 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26784 insn = emit_move_insn (lr, reg);
26785 RTX_FRAME_RELATED_P (insn) = 1;
26786 add_reg_note (insn, REG_CFA_RESTORE, lr);
26790 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
26792 rs6000_set_handled_components (sbitmap components)
26794 rs6000_stack_t *info = rs6000_stack_info ();
26796 for (int i = info->first_gp_reg_save; i < 32; i++)
26797 if (bitmap_bit_p (components, i))
26798 cfun->machine->gpr_is_wrapped_separately[i] = true;
26800 for (int i = info->first_fp_reg_save; i < 64; i++)
26801 if (bitmap_bit_p (components, i))
26802 cfun->machine->fpr_is_wrapped_separately[i - 32] = true;
26804 if (bitmap_bit_p (components, 0))
26805 cfun->machine->lr_is_wrapped_separately = true;
26807 if (bitmap_bit_p (components, 2))
26808 cfun->machine->toc_is_wrapped_separately = true;
26811 /* VRSAVE is a bit vector representing which AltiVec registers
26812 are used. The OS uses this to determine which vector
26813 registers to save on a context switch. We need to save
26814 VRSAVE on the stack frame, add whatever AltiVec registers we
26815 used in this function, and do the corresponding magic in the
26818 emit_vrsave_prologue (rs6000_stack_t *info, int save_regno,
26819 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26821 /* Get VRSAVE into a GPR. */
26822 rtx reg = gen_rtx_REG (SImode, save_regno);
26823 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
26825 emit_insn (gen_get_vrsave_internal (reg));
26827 emit_insn (gen_rtx_SET (reg, vrsave));
26830 int offset = info->vrsave_save_offset + frame_off;
26831 emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
26833 /* Include the registers in the mask. */
26834 emit_insn (gen_iorsi3 (reg, reg, GEN_INT (info->vrsave_mask)));
26836 emit_insn (generate_set_vrsave (reg, info, 0));
26839 /* Set up the arg pointer (r12) for -fsplit-stack code. If __morestack was
26840 called, it left the arg pointer to the old stack in r29. Otherwise, the
26841 arg pointer is the top of the current frame. */
26843 emit_split_stack_prologue (rs6000_stack_t *info, rtx_insn *sp_adjust,
26844 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26846 cfun->machine->split_stack_argp_used = true;
26850 rtx r12 = gen_rtx_REG (Pmode, 12);
26851 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26852 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
26853 emit_insn_before (set_r12, sp_adjust);
26855 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
26857 rtx r12 = gen_rtx_REG (Pmode, 12);
26858 if (frame_off == 0)
26859 emit_move_insn (r12, frame_reg_rtx);
26861 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
26866 rtx r12 = gen_rtx_REG (Pmode, 12);
26867 rtx r29 = gen_rtx_REG (Pmode, 29);
26868 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
26869 rtx not_more = gen_label_rtx ();
26872 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26873 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
26874 gen_rtx_LABEL_REF (VOIDmode, not_more),
26876 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26877 JUMP_LABEL (jump) = not_more;
26878 LABEL_NUSES (not_more) += 1;
26879 emit_move_insn (r12, r29);
26880 emit_label (not_more);
26884 /* Emit function prologue as insns. */
26887 rs6000_emit_prologue (void)
26889 rs6000_stack_t *info = rs6000_stack_info ();
26890 machine_mode reg_mode = Pmode;
26891 int reg_size = TARGET_32BIT ? 4 : 8;
26892 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
26893 int fp_reg_size = 8;
26894 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26895 rtx frame_reg_rtx = sp_reg_rtx;
26896 unsigned int cr_save_regno;
26897 rtx cr_save_rtx = NULL_RTX;
26900 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
26901 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
26902 && call_used_regs[STATIC_CHAIN_REGNUM]);
26903 int using_split_stack = (flag_split_stack
26904 && (lookup_attribute ("no_split_stack",
26905 DECL_ATTRIBUTES (cfun->decl))
26908 /* Offset to top of frame for frame_reg and sp respectively. */
26909 HOST_WIDE_INT frame_off = 0;
26910 HOST_WIDE_INT sp_off = 0;
26911 /* sp_adjust is the stack adjusting instruction, tracked so that the
26912 insn setting up the split-stack arg pointer can be emitted just
26913 prior to it, when r12 is not used here for other purposes. */
26914 rtx_insn *sp_adjust = 0;
26917 /* Track and check usage of r0, r11, r12. */
26918 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
26919 #define START_USE(R) do \
26921 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26922 reg_inuse |= 1 << (R); \
26924 #define END_USE(R) do \
26926 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
26927 reg_inuse &= ~(1 << (R)); \
26929 #define NOT_INUSE(R) do \
26931 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26934 #define START_USE(R) do {} while (0)
26935 #define END_USE(R) do {} while (0)
26936 #define NOT_INUSE(R) do {} while (0)
26939 if (DEFAULT_ABI == ABI_ELFv2
26940 && !TARGET_SINGLE_PIC_BASE)
26942 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
26944 /* With -mminimal-toc we may generate an extra use of r2 below. */
26945 if (TARGET_TOC && TARGET_MINIMAL_TOC
26946 && !constant_pool_empty_p ())
26947 cfun->machine->r2_setup_needed = true;
26951 if (flag_stack_usage_info)
26952 current_function_static_stack_size = info->total_size;
26954 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
26956 HOST_WIDE_INT size = info->total_size;
26958 if (crtl->is_leaf && !cfun->calls_alloca)
26960 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
26961 rs6000_emit_probe_stack_range (get_stack_check_protect (),
26962 size - get_stack_check_protect ());
26965 rs6000_emit_probe_stack_range (get_stack_check_protect (), size);
26968 if (TARGET_FIX_AND_CONTINUE)
26970 /* gdb on darwin arranges to forward a function from the old
26971 address by modifying the first 5 instructions of the function
26972 to branch to the overriding function. This is necessary to
26973 permit function pointers that point to the old function to
26974 actually forward to the new function. */
26975 emit_insn (gen_nop ());
26976 emit_insn (gen_nop ());
26977 emit_insn (gen_nop ());
26978 emit_insn (gen_nop ());
26979 emit_insn (gen_nop ());
26982 /* Handle world saves specially here. */
26983 if (WORLD_SAVE_P (info))
26990 /* save_world expects lr in r0. */
26991 reg0 = gen_rtx_REG (Pmode, 0);
26992 if (info->lr_save_p)
26994 insn = emit_move_insn (reg0,
26995 gen_rtx_REG (Pmode, LR_REGNO));
26996 RTX_FRAME_RELATED_P (insn) = 1;
26999 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
27000 assumptions about the offsets of various bits of the stack
27002 gcc_assert (info->gp_save_offset == -220
27003 && info->fp_save_offset == -144
27004 && info->lr_save_offset == 8
27005 && info->cr_save_offset == 4
27008 && (!crtl->calls_eh_return
27009 || info->ehrd_offset == -432)
27010 && info->vrsave_save_offset == -224
27011 && info->altivec_save_offset == -416);
27013 treg = gen_rtx_REG (SImode, 11);
27014 emit_move_insn (treg, GEN_INT (-info->total_size));
27016 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
27017 in R11. It also clobbers R12, so beware! */
27019 /* Preserve CR2 for save_world prologues */
27021 sz += 32 - info->first_gp_reg_save;
27022 sz += 64 - info->first_fp_reg_save;
27023 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
27024 p = rtvec_alloc (sz);
27026 RTVEC_ELT (p, j++) = gen_hard_reg_clobber (SImode, LR_REGNO);
27027 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
27028 gen_rtx_SYMBOL_REF (Pmode,
27030 /* We do floats first so that the instruction pattern matches
27032 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
27034 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT ? DFmode : SFmode,
27035 info->first_fp_reg_save + i),
27037 info->fp_save_offset + frame_off + 8 * i);
27038 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
27040 = gen_frame_store (gen_rtx_REG (V4SImode,
27041 info->first_altivec_reg_save + i),
27043 info->altivec_save_offset + frame_off + 16 * i);
27044 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27046 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27048 info->gp_save_offset + frame_off + reg_size * i);
27050 /* CR register traditionally saved as CR2. */
27052 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
27053 frame_reg_rtx, info->cr_save_offset + frame_off);
27054 /* Explain about use of R0. */
27055 if (info->lr_save_p)
27057 = gen_frame_store (reg0,
27058 frame_reg_rtx, info->lr_save_offset + frame_off);
27059 /* Explain what happens to the stack pointer. */
27061 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
27062 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
27065 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27066 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27067 treg, GEN_INT (-info->total_size));
27068 sp_off = frame_off = info->total_size;
27071 strategy = info->savres_strategy;
27073 /* For V.4, update stack before we do any saving and set back pointer. */
27074 if (! WORLD_SAVE_P (info)
27076 && (DEFAULT_ABI == ABI_V4
27077 || crtl->calls_eh_return))
27079 bool need_r11 = (!(strategy & SAVE_INLINE_FPRS)
27080 || !(strategy & SAVE_INLINE_GPRS)
27081 || !(strategy & SAVE_INLINE_VRS));
27082 int ptr_regno = -1;
27083 rtx ptr_reg = NULL_RTX;
27086 if (info->total_size < 32767)
27087 frame_off = info->total_size;
27090 else if (info->cr_save_p
27092 || info->first_fp_reg_save < 64
27093 || info->first_gp_reg_save < 32
27094 || info->altivec_size != 0
27095 || info->vrsave_size != 0
27096 || crtl->calls_eh_return)
27100 /* The prologue won't be saving any regs so there is no need
27101 to set up a frame register to access any frame save area.
27102 We also won't be using frame_off anywhere below, but set
27103 the correct value anyway to protect against future
27104 changes to this function. */
27105 frame_off = info->total_size;
27107 if (ptr_regno != -1)
27109 /* Set up the frame offset to that needed by the first
27110 out-of-line save function. */
27111 START_USE (ptr_regno);
27112 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27113 frame_reg_rtx = ptr_reg;
27114 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
27115 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
27116 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
27117 ptr_off = info->gp_save_offset + info->gp_size;
27118 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
27119 ptr_off = info->altivec_save_offset + info->altivec_size;
27120 frame_off = -ptr_off;
27122 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
27124 if (REGNO (frame_reg_rtx) == 12)
27126 sp_off = info->total_size;
27127 if (frame_reg_rtx != sp_reg_rtx)
27128 rs6000_emit_stack_tie (frame_reg_rtx, false);
27131 /* If we use the link register, get it into r0. */
27132 if (!WORLD_SAVE_P (info) && info->lr_save_p
27133 && !cfun->machine->lr_is_wrapped_separately)
27135 rtx addr, reg, mem;
27137 reg = gen_rtx_REG (Pmode, 0);
27139 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
27140 RTX_FRAME_RELATED_P (insn) = 1;
27142 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
27143 | SAVE_NOINLINE_FPRS_SAVES_LR)))
27145 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
27146 GEN_INT (info->lr_save_offset + frame_off));
27147 mem = gen_rtx_MEM (Pmode, addr);
27148 /* This should not be of rs6000_sr_alias_set, because of
27149 __builtin_return_address. */
27151 insn = emit_move_insn (mem, reg);
27152 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27153 NULL_RTX, NULL_RTX);
27158 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
27159 r12 will be needed by out-of-line gpr save. */
27160 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27161 && !(strategy & (SAVE_INLINE_GPRS
27162 | SAVE_NOINLINE_GPRS_SAVES_LR))
27164 if (!WORLD_SAVE_P (info)
27166 && REGNO (frame_reg_rtx) != cr_save_regno
27167 && !(using_static_chain_p && cr_save_regno == 11)
27168 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
27170 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
27171 START_USE (cr_save_regno);
27172 rs6000_emit_prologue_move_from_cr (cr_save_rtx);
27175 /* Do any required saving of fpr's. If only one or two to save, do
27176 it ourselves. Otherwise, call function. */
27177 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
27179 int offset = info->fp_save_offset + frame_off;
27180 for (int i = info->first_fp_reg_save; i < 64; i++)
27183 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
27184 emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset,
27185 sp_off - frame_off);
27187 offset += fp_reg_size;
27190 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
27192 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27193 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27194 unsigned ptr_regno = ptr_regno_for_savres (sel);
27195 rtx ptr_reg = frame_reg_rtx;
27197 if (REGNO (frame_reg_rtx) == ptr_regno)
27198 gcc_checking_assert (frame_off == 0);
27201 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27202 NOT_INUSE (ptr_regno);
27203 emit_insn (gen_add3_insn (ptr_reg,
27204 frame_reg_rtx, GEN_INT (frame_off)));
27206 insn = rs6000_emit_savres_rtx (info, ptr_reg,
27207 info->fp_save_offset,
27208 info->lr_save_offset,
27210 rs6000_frame_related (insn, ptr_reg, sp_off,
27211 NULL_RTX, NULL_RTX);
27216 /* Save GPRs. This is done as a PARALLEL if we are using
27217 the store-multiple instructions. */
27218 if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
27220 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
27221 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
27222 unsigned ptr_regno = ptr_regno_for_savres (sel);
27223 rtx ptr_reg = frame_reg_rtx;
27224 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
27225 int end_save = info->gp_save_offset + info->gp_size;
27228 if (ptr_regno == 12)
27231 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27233 /* Need to adjust r11 (r12) if we saved any FPRs. */
27234 if (end_save + frame_off != 0)
27236 rtx offset = GEN_INT (end_save + frame_off);
27239 frame_off = -end_save;
27241 NOT_INUSE (ptr_regno);
27242 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27244 else if (!ptr_set_up)
27246 NOT_INUSE (ptr_regno);
27247 emit_move_insn (ptr_reg, frame_reg_rtx);
27249 ptr_off = -end_save;
27250 insn = rs6000_emit_savres_rtx (info, ptr_reg,
27251 info->gp_save_offset + ptr_off,
27252 info->lr_save_offset + ptr_off,
27254 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
27255 NULL_RTX, NULL_RTX);
27259 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
27263 p = rtvec_alloc (32 - info->first_gp_reg_save);
27264 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27266 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27268 info->gp_save_offset + frame_off + reg_size * i);
27269 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27270 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27271 NULL_RTX, NULL_RTX);
27273 else if (!WORLD_SAVE_P (info))
27275 int offset = info->gp_save_offset + frame_off;
27276 for (int i = info->first_gp_reg_save; i < 32; i++)
27279 && !cfun->machine->gpr_is_wrapped_separately[i])
27280 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
27281 sp_off - frame_off);
27283 offset += reg_size;
27287 if (crtl->calls_eh_return)
27294 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27295 if (regno == INVALID_REGNUM)
27299 p = rtvec_alloc (i);
27303 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27304 if (regno == INVALID_REGNUM)
27308 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
27310 info->ehrd_offset + sp_off + reg_size * (int) i);
27311 RTVEC_ELT (p, i) = set;
27312 RTX_FRAME_RELATED_P (set) = 1;
27315 insn = emit_insn (gen_blockage ());
27316 RTX_FRAME_RELATED_P (insn) = 1;
27317 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
27320 /* In AIX ABI we need to make sure r2 is really saved. */
27321 if (TARGET_AIX && crtl->calls_eh_return)
27323 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
27324 rtx join_insn, note;
27325 rtx_insn *save_insn;
27326 long toc_restore_insn;
27328 tmp_reg = gen_rtx_REG (Pmode, 11);
27329 tmp_reg_si = gen_rtx_REG (SImode, 11);
27330 if (using_static_chain_p)
27333 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
27337 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
27338 /* Peek at instruction to which this function returns. If it's
27339 restoring r2, then we know we've already saved r2. We can't
27340 unconditionally save r2 because the value we have will already
27341 be updated if we arrived at this function via a plt call or
27342 toc adjusting stub. */
27343 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
27344 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
27345 + RS6000_TOC_SAVE_SLOT);
27346 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
27347 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
27348 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
27349 validate_condition_mode (EQ, CCUNSmode);
27350 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
27351 emit_insn (gen_rtx_SET (compare_result,
27352 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
27353 toc_save_done = gen_label_rtx ();
27354 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27355 gen_rtx_EQ (VOIDmode, compare_result,
27357 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
27359 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27360 JUMP_LABEL (jump) = toc_save_done;
27361 LABEL_NUSES (toc_save_done) += 1;
27363 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
27364 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
27365 sp_off - frame_off);
27367 emit_label (toc_save_done);
27369 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
27370 have a CFG that has different saves along different paths.
27371 Move the note to a dummy blockage insn, which describes that
27372 R2 is unconditionally saved after the label. */
27373 /* ??? An alternate representation might be a special insn pattern
27374 containing both the branch and the store. That might let the
27375 code that minimizes the number of DW_CFA_advance opcodes better
27376 freedom in placing the annotations. */
27377 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
27379 remove_note (save_insn, note);
27381 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
27382 copy_rtx (PATTERN (save_insn)), NULL_RTX);
27383 RTX_FRAME_RELATED_P (save_insn) = 0;
27385 join_insn = emit_insn (gen_blockage ());
27386 REG_NOTES (join_insn) = note;
27387 RTX_FRAME_RELATED_P (join_insn) = 1;
27389 if (using_static_chain_p)
27391 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
27398 /* Save CR if we use any that must be preserved. */
27399 if (!WORLD_SAVE_P (info) && info->cr_save_p)
27401 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
27402 GEN_INT (info->cr_save_offset + frame_off));
27403 rtx mem = gen_frame_mem (SImode, addr);
27405 /* If we didn't copy cr before, do so now using r0. */
27406 if (cr_save_rtx == NULL_RTX)
27409 cr_save_rtx = gen_rtx_REG (SImode, 0);
27410 rs6000_emit_prologue_move_from_cr (cr_save_rtx);
27413 /* Saving CR requires a two-instruction sequence: one instruction
27414 to move the CR to a general-purpose register, and a second
27415 instruction that stores the GPR to memory.
27417 We do not emit any DWARF CFI records for the first of these,
27418 because we cannot properly represent the fact that CR is saved in
27419 a register. One reason is that we cannot express that multiple
27420 CR fields are saved; another reason is that on 64-bit, the size
27421 of the CR register in DWARF (4 bytes) differs from the size of
27422 a general-purpose register.
27424 This means if any intervening instruction were to clobber one of
27425 the call-saved CR fields, we'd have incorrect CFI. To prevent
27426 this from happening, we mark the store to memory as a use of
27427 those CR fields, which prevents any such instruction from being
27428 scheduled in between the two instructions. */
27433 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
27434 for (i = 0; i < 8; i++)
27435 if (save_reg_p (CR0_REGNO + i))
27436 crsave_v[n_crsave++]
27437 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
27439 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
27440 gen_rtvec_v (n_crsave, crsave_v)));
27441 END_USE (REGNO (cr_save_rtx));
27443 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
27444 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
27445 so we need to construct a frame expression manually. */
27446 RTX_FRAME_RELATED_P (insn) = 1;
27448 /* Update address to be stack-pointer relative, like
27449 rs6000_frame_related would do. */
27450 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
27451 GEN_INT (info->cr_save_offset + sp_off));
27452 mem = gen_frame_mem (SImode, addr);
27454 if (DEFAULT_ABI == ABI_ELFv2)
27456 /* In the ELFv2 ABI we generate separate CFI records for each
27457 CR field that was actually saved. They all point to the
27458 same 32-bit stack slot. */
27462 for (i = 0; i < 8; i++)
27463 if (save_reg_p (CR0_REGNO + i))
27466 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
27468 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
27472 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27473 gen_rtx_PARALLEL (VOIDmode,
27474 gen_rtvec_v (n_crframe, crframe)));
27478 /* In other ABIs, by convention, we use a single CR regnum to
27479 represent the fact that all call-saved CR fields are saved.
27480 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
27481 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
27482 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
27486 /* In the ELFv2 ABI we need to save all call-saved CR fields into
27487 *separate* slots if the routine calls __builtin_eh_return, so
27488 that they can be independently restored by the unwinder. */
27489 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27491 int i, cr_off = info->ehcr_offset;
27494 /* ??? We might get better performance by using multiple mfocrf
27496 crsave = gen_rtx_REG (SImode, 0);
27497 emit_insn (gen_prologue_movesi_from_cr (crsave));
27499 for (i = 0; i < 8; i++)
27500 if (!call_used_regs[CR0_REGNO + i])
27502 rtvec p = rtvec_alloc (2);
27504 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
27506 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
27508 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27510 RTX_FRAME_RELATED_P (insn) = 1;
27511 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27512 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
27513 sp_reg_rtx, cr_off + sp_off));
27515 cr_off += reg_size;
27519 /* If we are emitting stack probes, but allocate no stack, then
27520 just note that in the dump file. */
27521 if (flag_stack_clash_protection
27524 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
27526 /* Update stack and set back pointer unless this is V.4,
27527 for which it was done previously. */
27528 if (!WORLD_SAVE_P (info) && info->push_p
27529 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
27531 rtx ptr_reg = NULL;
27534 /* If saving altivec regs we need to be able to address all save
27535 locations using a 16-bit offset. */
27536 if ((strategy & SAVE_INLINE_VRS) == 0
27537 || (info->altivec_size != 0
27538 && (info->altivec_save_offset + info->altivec_size - 16
27539 + info->total_size - frame_off) > 32767)
27540 || (info->vrsave_size != 0
27541 && (info->vrsave_save_offset
27542 + info->total_size - frame_off) > 32767))
27544 int sel = SAVRES_SAVE | SAVRES_VR;
27545 unsigned ptr_regno = ptr_regno_for_savres (sel);
27547 if (using_static_chain_p
27548 && ptr_regno == STATIC_CHAIN_REGNUM)
27550 if (REGNO (frame_reg_rtx) != ptr_regno)
27551 START_USE (ptr_regno);
27552 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27553 frame_reg_rtx = ptr_reg;
27554 ptr_off = info->altivec_save_offset + info->altivec_size;
27555 frame_off = -ptr_off;
27557 else if (REGNO (frame_reg_rtx) == 1)
27558 frame_off = info->total_size;
27559 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
27561 if (REGNO (frame_reg_rtx) == 12)
27563 sp_off = info->total_size;
27564 if (frame_reg_rtx != sp_reg_rtx)
27565 rs6000_emit_stack_tie (frame_reg_rtx, false);
27568 /* Set frame pointer, if needed. */
27569 if (frame_pointer_needed)
27571 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
27573 RTX_FRAME_RELATED_P (insn) = 1;
27576 /* Save AltiVec registers if needed. Save here because the red zone does
27577 not always include AltiVec registers. */
27578 if (!WORLD_SAVE_P (info)
27579 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
27581 int end_save = info->altivec_save_offset + info->altivec_size;
27583 /* Oddly, the vector save/restore functions point r0 at the end
27584 of the save area, then use r11 or r12 to load offsets for
27585 [reg+reg] addressing. */
27586 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27587 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
27588 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27590 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
27592 if (scratch_regno == 12)
27594 if (end_save + frame_off != 0)
27596 rtx offset = GEN_INT (end_save + frame_off);
27598 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27601 emit_move_insn (ptr_reg, frame_reg_rtx);
27603 ptr_off = -end_save;
27604 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27605 info->altivec_save_offset + ptr_off,
27606 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
27607 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
27608 NULL_RTX, NULL_RTX);
27609 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
27611 /* The oddity mentioned above clobbered our frame reg. */
27612 emit_move_insn (frame_reg_rtx, ptr_reg);
27613 frame_off = ptr_off;
27616 else if (!WORLD_SAVE_P (info)
27617 && info->altivec_size != 0)
27621 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27622 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27624 rtx areg, savereg, mem;
27625 HOST_WIDE_INT offset;
27627 offset = (info->altivec_save_offset + frame_off
27628 + 16 * (i - info->first_altivec_reg_save));
27630 savereg = gen_rtx_REG (V4SImode, i);
27632 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
27634 mem = gen_frame_mem (V4SImode,
27635 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27636 GEN_INT (offset)));
27637 insn = emit_insn (gen_rtx_SET (mem, savereg));
27643 areg = gen_rtx_REG (Pmode, 0);
27644 emit_move_insn (areg, GEN_INT (offset));
27646 /* AltiVec addressing mode is [reg+reg]. */
27647 mem = gen_frame_mem (V4SImode,
27648 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
27650 /* Rather than emitting a generic move, force use of the stvx
27651 instruction, which we always want on ISA 2.07 (power8) systems.
27652 In particular we don't want xxpermdi/stxvd2x for little
27654 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
27657 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27658 areg, GEN_INT (offset));
27662 /* VRSAVE is a bit vector representing which AltiVec registers
27663 are used. The OS uses this to determine which vector
27664 registers to save on a context switch. We need to save
27665 VRSAVE on the stack frame, add whatever AltiVec registers we
27666 used in this function, and do the corresponding magic in the
27669 if (!WORLD_SAVE_P (info) && info->vrsave_size != 0)
27671 /* Get VRSAVE into a GPR. Note that ABI_V4 and ABI_DARWIN might
27672 be using r12 as frame_reg_rtx and r11 as the static chain
27673 pointer for nested functions. */
27674 int save_regno = 12;
27675 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27676 && !using_static_chain_p)
27678 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
27681 if (using_static_chain_p)
27684 NOT_INUSE (save_regno);
27686 emit_vrsave_prologue (info, save_regno, frame_off, frame_reg_rtx);
27689 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
27690 if (!TARGET_SINGLE_PIC_BASE
27691 && ((TARGET_TOC && TARGET_MINIMAL_TOC
27692 && !constant_pool_empty_p ())
27693 || (DEFAULT_ABI == ABI_V4
27694 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
27695 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
27697 /* If emit_load_toc_table will use the link register, we need to save
27698 it. We use R12 for this purpose because emit_load_toc_table
27699 can use register 0. This allows us to use a plain 'blr' to return
27700 from the procedure more often. */
27701 int save_LR_around_toc_setup = (TARGET_ELF
27702 && DEFAULT_ABI == ABI_V4
27704 && ! info->lr_save_p
27705 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
27706 if (save_LR_around_toc_setup)
27708 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27709 rtx tmp = gen_rtx_REG (Pmode, 12);
27712 insn = emit_move_insn (tmp, lr);
27713 RTX_FRAME_RELATED_P (insn) = 1;
27715 rs6000_emit_load_toc_table (TRUE);
27717 insn = emit_move_insn (lr, tmp);
27718 add_reg_note (insn, REG_CFA_RESTORE, lr);
27719 RTX_FRAME_RELATED_P (insn) = 1;
27722 rs6000_emit_load_toc_table (TRUE);
27726 if (!TARGET_SINGLE_PIC_BASE
27727 && DEFAULT_ABI == ABI_DARWIN
27728 && flag_pic && crtl->uses_pic_offset_table)
27730 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27731 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
27733 /* Save and restore LR locally around this call (in R0). */
27734 if (!info->lr_save_p)
27735 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
27737 emit_insn (gen_load_macho_picbase (src));
27739 emit_move_insn (gen_rtx_REG (Pmode,
27740 RS6000_PIC_OFFSET_TABLE_REGNUM),
27743 if (!info->lr_save_p)
27744 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
27748 /* If we need to, save the TOC register after doing the stack setup.
27749 Do not emit eh frame info for this save. The unwinder wants info,
27750 conceptually attached to instructions in this function, about
27751 register values in the caller of this function. This R2 may have
27752 already been changed from the value in the caller.
27753 We don't attempt to write accurate DWARF EH frame info for R2
27754 because code emitted by gcc for a (non-pointer) function call
27755 doesn't save and restore R2. Instead, R2 is managed out-of-line
27756 by a linker generated plt call stub when the function resides in
27757 a shared library. This behavior is costly to describe in DWARF,
27758 both in terms of the size of DWARF info and the time taken in the
27759 unwinder to interpret it. R2 changes, apart from the
27760 calls_eh_return case earlier in this function, are handled by
27761 linux-unwind.h frob_update_context. */
27762 if (rs6000_save_toc_in_prologue_p ()
27763 && !cfun->machine->toc_is_wrapped_separately)
27765 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
27766 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
27769 /* Set up the arg pointer (r12) for -fsplit-stack code. */
27770 if (using_split_stack && split_stack_arg_pointer_used_p ())
27771 emit_split_stack_prologue (info, sp_adjust, frame_off, frame_reg_rtx);
27774 /* Output .extern statements for the save/restore routines we use. */
27777 rs6000_output_savres_externs (FILE *file)
27779 rs6000_stack_t *info = rs6000_stack_info ();
27781 if (TARGET_DEBUG_STACK)
27782 debug_stack_info (info);
27784 /* Write .extern for any function we will call to save and restore
27786 if (info->first_fp_reg_save < 64
27791 int regno = info->first_fp_reg_save - 32;
27793 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
27795 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27796 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27797 name = rs6000_savres_routine_name (regno, sel);
27798 fprintf (file, "\t.extern %s\n", name);
27800 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
27802 bool lr = (info->savres_strategy
27803 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27804 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27805 name = rs6000_savres_routine_name (regno, sel);
27806 fprintf (file, "\t.extern %s\n", name);
27811 /* Write function prologue. */
27814 rs6000_output_function_prologue (FILE *file)
27816 if (!cfun->is_thunk)
27817 rs6000_output_savres_externs (file);
27819 /* ELFv2 ABI r2 setup code and local entry point. This must follow
27820 immediately after the global entry point label. */
27821 if (rs6000_global_entry_point_needed_p ())
27823 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27825 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
27827 if (TARGET_CMODEL != CMODEL_LARGE)
27829 /* In the small and medium code models, we assume the TOC is less
27830 2 GB away from the text section, so it can be computed via the
27831 following two-instruction sequence. */
27834 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27835 fprintf (file, "0:\taddis 2,12,.TOC.-");
27836 assemble_name (file, buf);
27837 fprintf (file, "@ha\n");
27838 fprintf (file, "\taddi 2,2,.TOC.-");
27839 assemble_name (file, buf);
27840 fprintf (file, "@l\n");
27844 /* In the large code model, we allow arbitrary offsets between the
27845 TOC and the text section, so we have to load the offset from
27846 memory. The data field is emitted directly before the global
27847 entry point in rs6000_elf_declare_function_name. */
27850 #ifdef HAVE_AS_ENTRY_MARKERS
27851 /* If supported by the linker, emit a marker relocation. If the
27852 total code size of the final executable or shared library
27853 happens to fit into 2 GB after all, the linker will replace
27854 this code sequence with the sequence for the small or medium
27856 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
27858 fprintf (file, "\tld 2,");
27859 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
27860 assemble_name (file, buf);
27861 fprintf (file, "-");
27862 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27863 assemble_name (file, buf);
27864 fprintf (file, "(12)\n");
27865 fprintf (file, "\tadd 2,2,12\n");
27868 fputs ("\t.localentry\t", file);
27869 assemble_name (file, name);
27870 fputs (",.-", file);
27871 assemble_name (file, name);
27872 fputs ("\n", file);
27875 /* Output -mprofile-kernel code. This needs to be done here instead of
27876 in output_function_profile since it must go after the ELFv2 ABI
27877 local entry point. */
27878 if (TARGET_PROFILE_KERNEL && crtl->profile)
27880 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27881 gcc_assert (!TARGET_32BIT);
27883 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
27885 /* In the ELFv2 ABI we have no compiler stack word. It must be
27886 the resposibility of _mcount to preserve the static chain
27887 register if required. */
27888 if (DEFAULT_ABI != ABI_ELFv2
27889 && cfun->static_chain_decl != NULL)
27891 asm_fprintf (file, "\tstd %s,24(%s)\n",
27892 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27893 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27894 asm_fprintf (file, "\tld %s,24(%s)\n",
27895 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27898 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27901 rs6000_pic_labelno++;
27904 /* -mprofile-kernel code calls mcount before the function prolog,
27905 so a profiled leaf function should stay a leaf function. */
27907 rs6000_keep_leaf_when_profiled ()
27909 return TARGET_PROFILE_KERNEL;
27912 /* Non-zero if vmx regs are restored before the frame pop, zero if
27913 we restore after the pop when possible. */
27914 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
27916 /* Restoring cr is a two step process: loading a reg from the frame
27917 save, then moving the reg to cr. For ABI_V4 we must let the
27918 unwinder know that the stack location is no longer valid at or
27919 before the stack deallocation, but we can't emit a cfa_restore for
27920 cr at the stack deallocation like we do for other registers.
27921 The trouble is that it is possible for the move to cr to be
27922 scheduled after the stack deallocation. So say exactly where cr
27923 is located on each of the two insns. */
27926 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
27928 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
27929 rtx reg = gen_rtx_REG (SImode, regno);
27930 rtx_insn *insn = emit_move_insn (reg, mem);
27932 if (!exit_func && DEFAULT_ABI == ABI_V4)
27934 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27935 rtx set = gen_rtx_SET (reg, cr);
27937 add_reg_note (insn, REG_CFA_REGISTER, set);
27938 RTX_FRAME_RELATED_P (insn) = 1;
27943 /* Reload CR from REG. */
27946 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
27951 if (using_mfcr_multiple)
27953 for (i = 0; i < 8; i++)
27954 if (save_reg_p (CR0_REGNO + i))
27956 gcc_assert (count);
27959 if (using_mfcr_multiple && count > 1)
27965 p = rtvec_alloc (count);
27968 for (i = 0; i < 8; i++)
27969 if (save_reg_p (CR0_REGNO + i))
27971 rtvec r = rtvec_alloc (2);
27972 RTVEC_ELT (r, 0) = reg;
27973 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
27974 RTVEC_ELT (p, ndx) =
27975 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
27976 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
27979 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27980 gcc_assert (ndx == count);
27982 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27983 CR field separately. */
27984 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27986 for (i = 0; i < 8; i++)
27987 if (save_reg_p (CR0_REGNO + i))
27988 add_reg_note (insn, REG_CFA_RESTORE,
27989 gen_rtx_REG (SImode, CR0_REGNO + i));
27991 RTX_FRAME_RELATED_P (insn) = 1;
27995 for (i = 0; i < 8; i++)
27996 if (save_reg_p (CR0_REGNO + i))
27998 rtx insn = emit_insn (gen_movsi_to_cr_one
27999 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
28001 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
28002 CR field separately, attached to the insn that in fact
28003 restores this particular CR field. */
28004 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
28006 add_reg_note (insn, REG_CFA_RESTORE,
28007 gen_rtx_REG (SImode, CR0_REGNO + i));
28009 RTX_FRAME_RELATED_P (insn) = 1;
28013 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
28014 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
28015 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
28017 rtx_insn *insn = get_last_insn ();
28018 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
28020 add_reg_note (insn, REG_CFA_RESTORE, cr);
28021 RTX_FRAME_RELATED_P (insn) = 1;
28025 /* Like cr, the move to lr instruction can be scheduled after the
28026 stack deallocation, but unlike cr, its stack frame save is still
28027 valid. So we only need to emit the cfa_restore on the correct
28031 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
28033 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
28034 rtx reg = gen_rtx_REG (Pmode, regno);
28036 emit_move_insn (reg, mem);
28040 restore_saved_lr (int regno, bool exit_func)
28042 rtx reg = gen_rtx_REG (Pmode, regno);
28043 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28044 rtx_insn *insn = emit_move_insn (lr, reg);
28046 if (!exit_func && flag_shrink_wrap)
28048 add_reg_note (insn, REG_CFA_RESTORE, lr);
28049 RTX_FRAME_RELATED_P (insn) = 1;
28054 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
28056 if (DEFAULT_ABI == ABI_ELFv2)
28059 for (i = 0; i < 8; i++)
28060 if (save_reg_p (CR0_REGNO + i))
28062 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
28063 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
28067 else if (info->cr_save_p)
28068 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28069 gen_rtx_REG (SImode, CR2_REGNO),
28072 if (info->lr_save_p)
28073 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28074 gen_rtx_REG (Pmode, LR_REGNO),
28076 return cfa_restores;
28079 /* Return true if OFFSET from stack pointer can be clobbered by signals.
28080 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
28081 below stack pointer not cloberred by signals. */
28084 offset_below_red_zone_p (HOST_WIDE_INT offset)
28086 return offset < (DEFAULT_ABI == ABI_V4
28088 : TARGET_32BIT ? -220 : -288);
28091 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
28094 emit_cfa_restores (rtx cfa_restores)
28096 rtx_insn *insn = get_last_insn ();
28097 rtx *loc = ®_NOTES (insn);
28100 loc = &XEXP (*loc, 1);
28101 *loc = cfa_restores;
28102 RTX_FRAME_RELATED_P (insn) = 1;
28105 /* Emit function epilogue as insns. */
28108 rs6000_emit_epilogue (int sibcall)
28110 rs6000_stack_t *info;
28111 int restoring_GPRs_inline;
28112 int restoring_FPRs_inline;
28113 int using_load_multiple;
28114 int using_mtcr_multiple;
28115 int use_backchain_to_restore_sp;
28118 HOST_WIDE_INT frame_off = 0;
28119 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
28120 rtx frame_reg_rtx = sp_reg_rtx;
28121 rtx cfa_restores = NULL_RTX;
28123 rtx cr_save_reg = NULL_RTX;
28124 machine_mode reg_mode = Pmode;
28125 int reg_size = TARGET_32BIT ? 4 : 8;
28126 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
28127 int fp_reg_size = 8;
28130 unsigned ptr_regno;
28132 info = rs6000_stack_info ();
28134 strategy = info->savres_strategy;
28135 using_load_multiple = strategy & REST_MULTIPLE;
28136 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
28137 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
28138 using_mtcr_multiple = (rs6000_tune == PROCESSOR_PPC601
28139 || rs6000_tune == PROCESSOR_PPC603
28140 || rs6000_tune == PROCESSOR_PPC750
28142 /* Restore via the backchain when we have a large frame, since this
28143 is more efficient than an addis, addi pair. The second condition
28144 here will not trigger at the moment; We don't actually need a
28145 frame pointer for alloca, but the generic parts of the compiler
28146 give us one anyway. */
28147 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
28148 ? info->lr_save_offset
28150 || (cfun->calls_alloca
28151 && !frame_pointer_needed));
28152 restore_lr = (info->lr_save_p
28153 && (restoring_FPRs_inline
28154 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
28155 && (restoring_GPRs_inline
28156 || info->first_fp_reg_save < 64)
28157 && !cfun->machine->lr_is_wrapped_separately);
28160 if (WORLD_SAVE_P (info))
28164 const char *alloc_rname;
28167 /* eh_rest_world_r10 will return to the location saved in the LR
28168 stack slot (which is not likely to be our caller.)
28169 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
28170 rest_world is similar, except any R10 parameter is ignored.
28171 The exception-handling stuff that was here in 2.95 is no
28172 longer necessary. */
28175 + 32 - info->first_gp_reg_save
28176 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
28177 + 63 + 1 - info->first_fp_reg_save);
28179 strcpy (rname, ((crtl->calls_eh_return) ?
28180 "*eh_rest_world_r10" : "*rest_world"));
28181 alloc_rname = ggc_strdup (rname);
28184 RTVEC_ELT (p, j++) = ret_rtx;
28186 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
28187 /* The instruction pattern requires a clobber here;
28188 it is shared with the restVEC helper. */
28189 RTVEC_ELT (p, j++) = gen_hard_reg_clobber (Pmode, 11);
28192 /* CR register traditionally saved as CR2. */
28193 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
28195 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
28196 if (flag_shrink_wrap)
28198 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28199 gen_rtx_REG (Pmode, LR_REGNO),
28201 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28205 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28207 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
28209 = gen_frame_load (reg,
28210 frame_reg_rtx, info->gp_save_offset + reg_size * i);
28211 if (flag_shrink_wrap
28212 && save_reg_p (info->first_gp_reg_save + i))
28213 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28215 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
28217 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
28219 = gen_frame_load (reg,
28220 frame_reg_rtx, info->altivec_save_offset + 16 * i);
28221 if (flag_shrink_wrap
28222 && save_reg_p (info->first_altivec_reg_save + i))
28223 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28225 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
28227 rtx reg = gen_rtx_REG (TARGET_HARD_FLOAT ? DFmode : SFmode,
28228 info->first_fp_reg_save + i);
28230 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
28231 if (flag_shrink_wrap
28232 && save_reg_p (info->first_fp_reg_save + i))
28233 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28235 RTVEC_ELT (p, j++) = gen_hard_reg_clobber (Pmode, 0);
28236 RTVEC_ELT (p, j++) = gen_hard_reg_clobber (SImode, 12);
28237 RTVEC_ELT (p, j++) = gen_hard_reg_clobber (SImode, 7);
28238 RTVEC_ELT (p, j++) = gen_hard_reg_clobber (SImode, 8);
28240 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
28241 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28243 if (flag_shrink_wrap)
28245 REG_NOTES (insn) = cfa_restores;
28246 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28247 RTX_FRAME_RELATED_P (insn) = 1;
28252 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
28254 frame_off = info->total_size;
28256 /* Restore AltiVec registers if we must do so before adjusting the
28258 if (info->altivec_size != 0
28259 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28260 || (DEFAULT_ABI != ABI_V4
28261 && offset_below_red_zone_p (info->altivec_save_offset))))
28264 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
28266 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
28267 if (use_backchain_to_restore_sp)
28269 int frame_regno = 11;
28271 if ((strategy & REST_INLINE_VRS) == 0)
28273 /* Of r11 and r12, select the one not clobbered by an
28274 out-of-line restore function for the frame register. */
28275 frame_regno = 11 + 12 - scratch_regno;
28277 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
28278 emit_move_insn (frame_reg_rtx,
28279 gen_rtx_MEM (Pmode, sp_reg_rtx));
28282 else if (frame_pointer_needed)
28283 frame_reg_rtx = hard_frame_pointer_rtx;
28285 if ((strategy & REST_INLINE_VRS) == 0)
28287 int end_save = info->altivec_save_offset + info->altivec_size;
28289 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28290 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28292 if (end_save + frame_off != 0)
28294 rtx offset = GEN_INT (end_save + frame_off);
28296 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28299 emit_move_insn (ptr_reg, frame_reg_rtx);
28301 ptr_off = -end_save;
28302 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28303 info->altivec_save_offset + ptr_off,
28304 0, V4SImode, SAVRES_VR);
28308 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28309 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28311 rtx addr, areg, mem, insn;
28312 rtx reg = gen_rtx_REG (V4SImode, i);
28313 HOST_WIDE_INT offset
28314 = (info->altivec_save_offset + frame_off
28315 + 16 * (i - info->first_altivec_reg_save));
28317 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
28319 mem = gen_frame_mem (V4SImode,
28320 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28321 GEN_INT (offset)));
28322 insn = gen_rtx_SET (reg, mem);
28326 areg = gen_rtx_REG (Pmode, 0);
28327 emit_move_insn (areg, GEN_INT (offset));
28329 /* AltiVec addressing mode is [reg+reg]. */
28330 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28331 mem = gen_frame_mem (V4SImode, addr);
28333 /* Rather than emitting a generic move, force use of the
28334 lvx instruction, which we always want. In particular we
28335 don't want lxvd2x/xxpermdi for little endian. */
28336 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28339 (void) emit_insn (insn);
28343 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28344 if (((strategy & REST_INLINE_VRS) == 0
28345 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28346 && (flag_shrink_wrap
28347 || (offset_below_red_zone_p
28348 (info->altivec_save_offset
28349 + 16 * (i - info->first_altivec_reg_save))))
28352 rtx reg = gen_rtx_REG (V4SImode, i);
28353 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28357 /* Restore VRSAVE if we must do so before adjusting the stack. */
28358 if (info->vrsave_size != 0
28359 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28360 || (DEFAULT_ABI != ABI_V4
28361 && offset_below_red_zone_p (info->vrsave_save_offset))))
28365 if (frame_reg_rtx == sp_reg_rtx)
28367 if (use_backchain_to_restore_sp)
28369 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28370 emit_move_insn (frame_reg_rtx,
28371 gen_rtx_MEM (Pmode, sp_reg_rtx));
28374 else if (frame_pointer_needed)
28375 frame_reg_rtx = hard_frame_pointer_rtx;
28378 reg = gen_rtx_REG (SImode, 12);
28379 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28380 info->vrsave_save_offset + frame_off));
28382 emit_insn (generate_set_vrsave (reg, info, 1));
28386 /* If we have a large stack frame, restore the old stack pointer
28387 using the backchain. */
28388 if (use_backchain_to_restore_sp)
28390 if (frame_reg_rtx == sp_reg_rtx)
28392 /* Under V.4, don't reset the stack pointer until after we're done
28393 loading the saved registers. */
28394 if (DEFAULT_ABI == ABI_V4)
28395 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28397 insn = emit_move_insn (frame_reg_rtx,
28398 gen_rtx_MEM (Pmode, sp_reg_rtx));
28401 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28402 && DEFAULT_ABI == ABI_V4)
28403 /* frame_reg_rtx has been set up by the altivec restore. */
28407 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
28408 frame_reg_rtx = sp_reg_rtx;
28411 /* If we have a frame pointer, we can restore the old stack pointer
28413 else if (frame_pointer_needed)
28415 frame_reg_rtx = sp_reg_rtx;
28416 if (DEFAULT_ABI == ABI_V4)
28417 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28418 /* Prevent reordering memory accesses against stack pointer restore. */
28419 else if (cfun->calls_alloca
28420 || offset_below_red_zone_p (-info->total_size))
28421 rs6000_emit_stack_tie (frame_reg_rtx, true);
28423 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
28424 GEN_INT (info->total_size)));
28427 else if (info->push_p
28428 && DEFAULT_ABI != ABI_V4
28429 && !crtl->calls_eh_return)
28431 /* Prevent reordering memory accesses against stack pointer restore. */
28432 if (cfun->calls_alloca
28433 || offset_below_red_zone_p (-info->total_size))
28434 rs6000_emit_stack_tie (frame_reg_rtx, false);
28435 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
28436 GEN_INT (info->total_size)));
28439 if (insn && frame_reg_rtx == sp_reg_rtx)
28443 REG_NOTES (insn) = cfa_restores;
28444 cfa_restores = NULL_RTX;
28446 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28447 RTX_FRAME_RELATED_P (insn) = 1;
28450 /* Restore AltiVec registers if we have not done so already. */
28451 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28452 && info->altivec_size != 0
28453 && (DEFAULT_ABI == ABI_V4
28454 || !offset_below_red_zone_p (info->altivec_save_offset)))
28458 if ((strategy & REST_INLINE_VRS) == 0)
28460 int end_save = info->altivec_save_offset + info->altivec_size;
28462 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28463 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
28464 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28466 if (end_save + frame_off != 0)
28468 rtx offset = GEN_INT (end_save + frame_off);
28470 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28473 emit_move_insn (ptr_reg, frame_reg_rtx);
28475 ptr_off = -end_save;
28476 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28477 info->altivec_save_offset + ptr_off,
28478 0, V4SImode, SAVRES_VR);
28479 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
28481 /* Frame reg was clobbered by out-of-line save. Restore it
28482 from ptr_reg, and if we are calling out-of-line gpr or
28483 fpr restore set up the correct pointer and offset. */
28484 unsigned newptr_regno = 1;
28485 if (!restoring_GPRs_inline)
28487 bool lr = info->gp_save_offset + info->gp_size == 0;
28488 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28489 newptr_regno = ptr_regno_for_savres (sel);
28490 end_save = info->gp_save_offset + info->gp_size;
28492 else if (!restoring_FPRs_inline)
28494 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
28495 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28496 newptr_regno = ptr_regno_for_savres (sel);
28497 end_save = info->fp_save_offset + info->fp_size;
28500 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
28501 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
28503 if (end_save + ptr_off != 0)
28505 rtx offset = GEN_INT (end_save + ptr_off);
28507 frame_off = -end_save;
28509 emit_insn (gen_addsi3_carry (frame_reg_rtx,
28512 emit_insn (gen_adddi3_carry (frame_reg_rtx,
28517 frame_off = ptr_off;
28518 emit_move_insn (frame_reg_rtx, ptr_reg);
28524 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28525 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28527 rtx addr, areg, mem, insn;
28528 rtx reg = gen_rtx_REG (V4SImode, i);
28529 HOST_WIDE_INT offset
28530 = (info->altivec_save_offset + frame_off
28531 + 16 * (i - info->first_altivec_reg_save));
28533 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
28535 mem = gen_frame_mem (V4SImode,
28536 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28537 GEN_INT (offset)));
28538 insn = gen_rtx_SET (reg, mem);
28542 areg = gen_rtx_REG (Pmode, 0);
28543 emit_move_insn (areg, GEN_INT (offset));
28545 /* AltiVec addressing mode is [reg+reg]. */
28546 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28547 mem = gen_frame_mem (V4SImode, addr);
28549 /* Rather than emitting a generic move, force use of the
28550 lvx instruction, which we always want. In particular we
28551 don't want lxvd2x/xxpermdi for little endian. */
28552 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28555 (void) emit_insn (insn);
28559 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28560 if (((strategy & REST_INLINE_VRS) == 0
28561 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28562 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28565 rtx reg = gen_rtx_REG (V4SImode, i);
28566 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28570 /* Restore VRSAVE if we have not done so already. */
28571 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28572 && info->vrsave_size != 0
28573 && (DEFAULT_ABI == ABI_V4
28574 || !offset_below_red_zone_p (info->vrsave_save_offset)))
28578 reg = gen_rtx_REG (SImode, 12);
28579 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28580 info->vrsave_save_offset + frame_off));
28582 emit_insn (generate_set_vrsave (reg, info, 1));
28585 /* If we exit by an out-of-line restore function on ABI_V4 then that
28586 function will deallocate the stack, so we don't need to worry
28587 about the unwinder restoring cr from an invalid stack frame
28589 exit_func = (!restoring_FPRs_inline
28590 || (!restoring_GPRs_inline
28591 && info->first_fp_reg_save == 64));
28593 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
28594 *separate* slots if the routine calls __builtin_eh_return, so
28595 that they can be independently restored by the unwinder. */
28596 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
28598 int i, cr_off = info->ehcr_offset;
28600 for (i = 0; i < 8; i++)
28601 if (!call_used_regs[CR0_REGNO + i])
28603 rtx reg = gen_rtx_REG (SImode, 0);
28604 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28605 cr_off + frame_off));
28607 insn = emit_insn (gen_movsi_to_cr_one
28608 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
28610 if (!exit_func && flag_shrink_wrap)
28612 add_reg_note (insn, REG_CFA_RESTORE,
28613 gen_rtx_REG (SImode, CR0_REGNO + i));
28615 RTX_FRAME_RELATED_P (insn) = 1;
28618 cr_off += reg_size;
28622 /* Get the old lr if we saved it. If we are restoring registers
28623 out-of-line, then the out-of-line routines can do this for us. */
28624 if (restore_lr && restoring_GPRs_inline)
28625 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28627 /* Get the old cr if we saved it. */
28628 if (info->cr_save_p)
28630 unsigned cr_save_regno = 12;
28632 if (!restoring_GPRs_inline)
28634 /* Ensure we don't use the register used by the out-of-line
28635 gpr register restore below. */
28636 bool lr = info->gp_save_offset + info->gp_size == 0;
28637 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28638 int gpr_ptr_regno = ptr_regno_for_savres (sel);
28640 if (gpr_ptr_regno == 12)
28641 cr_save_regno = 11;
28642 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
28644 else if (REGNO (frame_reg_rtx) == 12)
28645 cr_save_regno = 11;
28647 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
28648 info->cr_save_offset + frame_off,
28652 /* Set LR here to try to overlap restores below. */
28653 if (restore_lr && restoring_GPRs_inline)
28654 restore_saved_lr (0, exit_func);
28656 /* Load exception handler data registers, if needed. */
28657 if (crtl->calls_eh_return)
28659 unsigned int i, regno;
28663 rtx reg = gen_rtx_REG (reg_mode, 2);
28664 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28665 frame_off + RS6000_TOC_SAVE_SLOT));
28672 regno = EH_RETURN_DATA_REGNO (i);
28673 if (regno == INVALID_REGNUM)
28676 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
28677 info->ehrd_offset + frame_off
28678 + reg_size * (int) i);
28680 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
28684 /* Restore GPRs. This is done as a PARALLEL if we are using
28685 the load-multiple instructions. */
28686 if (!restoring_GPRs_inline)
28688 /* We are jumping to an out-of-line function. */
28690 int end_save = info->gp_save_offset + info->gp_size;
28691 bool can_use_exit = end_save == 0;
28692 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
28695 /* Emit stack reset code if we need it. */
28696 ptr_regno = ptr_regno_for_savres (sel);
28697 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28699 rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28700 else if (end_save + frame_off != 0)
28701 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
28702 GEN_INT (end_save + frame_off)));
28703 else if (REGNO (frame_reg_rtx) != ptr_regno)
28704 emit_move_insn (ptr_reg, frame_reg_rtx);
28705 if (REGNO (frame_reg_rtx) == ptr_regno)
28706 frame_off = -end_save;
28708 if (can_use_exit && info->cr_save_p)
28709 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
28711 ptr_off = -end_save;
28712 rs6000_emit_savres_rtx (info, ptr_reg,
28713 info->gp_save_offset + ptr_off,
28714 info->lr_save_offset + ptr_off,
28717 else if (using_load_multiple)
28720 p = rtvec_alloc (32 - info->first_gp_reg_save);
28721 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28723 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28725 info->gp_save_offset + frame_off + reg_size * i);
28726 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28730 int offset = info->gp_save_offset + frame_off;
28731 for (i = info->first_gp_reg_save; i < 32; i++)
28734 && !cfun->machine->gpr_is_wrapped_separately[i])
28736 rtx reg = gen_rtx_REG (reg_mode, i);
28737 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28740 offset += reg_size;
28744 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28746 /* If the frame pointer was used then we can't delay emitting
28747 a REG_CFA_DEF_CFA note. This must happen on the insn that
28748 restores the frame pointer, r31. We may have already emitted
28749 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
28750 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
28751 be harmless if emitted. */
28752 if (frame_pointer_needed)
28754 insn = get_last_insn ();
28755 add_reg_note (insn, REG_CFA_DEF_CFA,
28756 plus_constant (Pmode, frame_reg_rtx, frame_off));
28757 RTX_FRAME_RELATED_P (insn) = 1;
28760 /* Set up cfa_restores. We always need these when
28761 shrink-wrapping. If not shrink-wrapping then we only need
28762 the cfa_restore when the stack location is no longer valid.
28763 The cfa_restores must be emitted on or before the insn that
28764 invalidates the stack, and of course must not be emitted
28765 before the insn that actually does the restore. The latter
28766 is why it is a bad idea to emit the cfa_restores as a group
28767 on the last instruction here that actually does a restore:
28768 That insn may be reordered with respect to others doing
28770 if (flag_shrink_wrap
28771 && !restoring_GPRs_inline
28772 && info->first_fp_reg_save == 64)
28773 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28775 for (i = info->first_gp_reg_save; i < 32; i++)
28777 && !cfun->machine->gpr_is_wrapped_separately[i])
28779 rtx reg = gen_rtx_REG (reg_mode, i);
28780 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28784 if (!restoring_GPRs_inline
28785 && info->first_fp_reg_save == 64)
28787 /* We are jumping to an out-of-line function. */
28789 emit_cfa_restores (cfa_restores);
28793 if (restore_lr && !restoring_GPRs_inline)
28795 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28796 restore_saved_lr (0, exit_func);
28799 /* Restore fpr's if we need to do it without calling a function. */
28800 if (restoring_FPRs_inline)
28802 int offset = info->fp_save_offset + frame_off;
28803 for (i = info->first_fp_reg_save; i < 64; i++)
28806 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
28808 rtx reg = gen_rtx_REG (fp_reg_mode, i);
28809 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28810 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28811 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
28815 offset += fp_reg_size;
28819 /* If we saved cr, restore it here. Just those that were used. */
28820 if (info->cr_save_p)
28821 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
28823 /* If this is V.4, unwind the stack pointer after all of the loads
28824 have been done, or set up r11 if we are restoring fp out of line. */
28826 if (!restoring_FPRs_inline)
28828 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28829 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28830 ptr_regno = ptr_regno_for_savres (sel);
28833 insn = rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28834 if (REGNO (frame_reg_rtx) == ptr_regno)
28837 if (insn && restoring_FPRs_inline)
28841 REG_NOTES (insn) = cfa_restores;
28842 cfa_restores = NULL_RTX;
28844 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28845 RTX_FRAME_RELATED_P (insn) = 1;
28848 if (crtl->calls_eh_return)
28850 rtx sa = EH_RETURN_STACKADJ_RTX;
28851 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
28854 if (!sibcall && restoring_FPRs_inline)
28858 /* We can't hang the cfa_restores off a simple return,
28859 since the shrink-wrap code sometimes uses an existing
28860 return. This means there might be a path from
28861 pre-prologue code to this return, and dwarf2cfi code
28862 wants the eh_frame unwinder state to be the same on
28863 all paths to any point. So we need to emit the
28864 cfa_restores before the return. For -m64 we really
28865 don't need epilogue cfa_restores at all, except for
28866 this irritating dwarf2cfi with shrink-wrap
28867 requirement; The stack red-zone means eh_frame info
28868 from the prologue telling the unwinder to restore
28869 from the stack is perfectly good right to the end of
28871 emit_insn (gen_blockage ());
28872 emit_cfa_restores (cfa_restores);
28873 cfa_restores = NULL_RTX;
28876 emit_jump_insn (targetm.gen_simple_return ());
28879 if (!sibcall && !restoring_FPRs_inline)
28881 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28882 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
28884 RTVEC_ELT (p, elt++) = ret_rtx;
28886 RTVEC_ELT (p, elt++) = gen_hard_reg_clobber (Pmode, LR_REGNO);
28888 /* We have to restore more than two FP registers, so branch to the
28889 restore function. It will return to our caller. */
28894 if (flag_shrink_wrap)
28895 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28897 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
28898 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
28899 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
28900 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
28902 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28904 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
28906 RTVEC_ELT (p, elt++)
28907 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
28908 if (flag_shrink_wrap
28909 && save_reg_p (info->first_fp_reg_save + i))
28910 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28913 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28919 /* Ensure the cfa_restores are hung off an insn that won't
28920 be reordered above other restores. */
28921 emit_insn (gen_blockage ());
28923 emit_cfa_restores (cfa_restores);
28927 /* Write function epilogue. */
28930 rs6000_output_function_epilogue (FILE *file)
28933 macho_branch_islands ();
28936 rtx_insn *insn = get_last_insn ();
28937 rtx_insn *deleted_debug_label = NULL;
28939 /* Mach-O doesn't support labels at the end of objects, so if
28940 it looks like we might want one, take special action.
28942 First, collect any sequence of deleted debug labels. */
28945 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
28947 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
28948 notes only, instead set their CODE_LABEL_NUMBER to -1,
28949 otherwise there would be code generation differences
28950 in between -g and -g0. */
28951 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28952 deleted_debug_label = insn;
28953 insn = PREV_INSN (insn);
28956 /* Second, if we have:
28959 then this needs to be detected, so skip past the barrier. */
28961 if (insn && BARRIER_P (insn))
28962 insn = PREV_INSN (insn);
28964 /* Up to now we've only seen notes or barriers. */
28969 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
28970 /* Trailing label: <barrier>. */
28971 fputs ("\tnop\n", file);
28974 /* Lastly, see if we have a completely empty function body. */
28975 while (insn && ! INSN_P (insn))
28976 insn = PREV_INSN (insn);
28977 /* If we don't find any insns, we've got an empty function body;
28978 I.e. completely empty - without a return or branch. This is
28979 taken as the case where a function body has been removed
28980 because it contains an inline __builtin_unreachable(). GCC
28981 states that reaching __builtin_unreachable() means UB so we're
28982 not obliged to do anything special; however, we want
28983 non-zero-sized function bodies. To meet this, and help the
28984 user out, let's trap the case. */
28986 fputs ("\ttrap\n", file);
28989 else if (deleted_debug_label)
28990 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
28991 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28992 CODE_LABEL_NUMBER (insn) = -1;
28996 /* Output a traceback table here. See /usr/include/sys/debug.h for info
28999 We don't output a traceback table if -finhibit-size-directive was
29000 used. The documentation for -finhibit-size-directive reads
29001 ``don't output a @code{.size} assembler directive, or anything
29002 else that would cause trouble if the function is split in the
29003 middle, and the two halves are placed at locations far apart in
29004 memory.'' The traceback table has this property, since it
29005 includes the offset from the start of the function to the
29006 traceback table itself.
29008 System V.4 Powerpc's (and the embedded ABI derived from it) use a
29009 different traceback table. */
29010 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29011 && ! flag_inhibit_size_directive
29012 && rs6000_traceback != traceback_none && !cfun->is_thunk)
29014 const char *fname = NULL;
29015 const char *language_string = lang_hooks.name;
29016 int fixed_parms = 0, float_parms = 0, parm_info = 0;
29018 int optional_tbtab;
29019 rs6000_stack_t *info = rs6000_stack_info ();
29021 if (rs6000_traceback == traceback_full)
29022 optional_tbtab = 1;
29023 else if (rs6000_traceback == traceback_part)
29024 optional_tbtab = 0;
29026 optional_tbtab = !optimize_size && !TARGET_ELF;
29028 if (optional_tbtab)
29030 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
29031 while (*fname == '.') /* V.4 encodes . in the name */
29034 /* Need label immediately before tbtab, so we can compute
29035 its offset from the function start. */
29036 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
29037 ASM_OUTPUT_LABEL (file, fname);
29040 /* The .tbtab pseudo-op can only be used for the first eight
29041 expressions, since it can't handle the possibly variable
29042 length fields that follow. However, if you omit the optional
29043 fields, the assembler outputs zeros for all optional fields
29044 anyways, giving each variable length field is minimum length
29045 (as defined in sys/debug.h). Thus we cannot use the .tbtab
29046 pseudo-op at all. */
29048 /* An all-zero word flags the start of the tbtab, for debuggers
29049 that have to find it by searching forward from the entry
29050 point or from the current pc. */
29051 fputs ("\t.long 0\n", file);
29053 /* Tbtab format type. Use format type 0. */
29054 fputs ("\t.byte 0,", file);
29056 /* Language type. Unfortunately, there does not seem to be any
29057 official way to discover the language being compiled, so we
29058 use language_string.
29059 C is 0. Fortran is 1. Ada is 3. C++ is 9.
29060 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
29061 a number, so for now use 9. LTO, Go, D, and JIT aren't assigned
29062 numbers either, so for now use 0. */
29064 || ! strcmp (language_string, "GNU GIMPLE")
29065 || ! strcmp (language_string, "GNU Go")
29066 || ! strcmp (language_string, "GNU D")
29067 || ! strcmp (language_string, "libgccjit"))
29069 else if (! strcmp (language_string, "GNU F77")
29070 || lang_GNU_Fortran ())
29072 else if (! strcmp (language_string, "GNU Ada"))
29074 else if (lang_GNU_CXX ()
29075 || ! strcmp (language_string, "GNU Objective-C++"))
29077 else if (! strcmp (language_string, "GNU Java"))
29079 else if (! strcmp (language_string, "GNU Objective-C"))
29082 gcc_unreachable ();
29083 fprintf (file, "%d,", i);
29085 /* 8 single bit fields: global linkage (not set for C extern linkage,
29086 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
29087 from start of procedure stored in tbtab, internal function, function
29088 has controlled storage, function has no toc, function uses fp,
29089 function logs/aborts fp operations. */
29090 /* Assume that fp operations are used if any fp reg must be saved. */
29091 fprintf (file, "%d,",
29092 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
29094 /* 6 bitfields: function is interrupt handler, name present in
29095 proc table, function calls alloca, on condition directives
29096 (controls stack walks, 3 bits), saves condition reg, saves
29098 /* The `function calls alloca' bit seems to be set whenever reg 31 is
29099 set up as a frame pointer, even when there is no alloca call. */
29100 fprintf (file, "%d,",
29101 ((optional_tbtab << 6)
29102 | ((optional_tbtab & frame_pointer_needed) << 5)
29103 | (info->cr_save_p << 1)
29104 | (info->lr_save_p)));
29106 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
29108 fprintf (file, "%d,",
29109 (info->push_p << 7) | (64 - info->first_fp_reg_save));
29111 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
29112 fprintf (file, "%d,", (32 - first_reg_to_save ()));
29114 if (optional_tbtab)
29116 /* Compute the parameter info from the function decl argument
29119 int next_parm_info_bit = 31;
29121 for (decl = DECL_ARGUMENTS (current_function_decl);
29122 decl; decl = DECL_CHAIN (decl))
29124 rtx parameter = DECL_INCOMING_RTL (decl);
29125 machine_mode mode = GET_MODE (parameter);
29127 if (REG_P (parameter))
29129 if (SCALAR_FLOAT_MODE_P (mode))
29152 gcc_unreachable ();
29155 /* If only one bit will fit, don't or in this entry. */
29156 if (next_parm_info_bit > 0)
29157 parm_info |= (bits << (next_parm_info_bit - 1));
29158 next_parm_info_bit -= 2;
29162 fixed_parms += ((GET_MODE_SIZE (mode)
29163 + (UNITS_PER_WORD - 1))
29165 next_parm_info_bit -= 1;
29171 /* Number of fixed point parameters. */
29172 /* This is actually the number of words of fixed point parameters; thus
29173 an 8 byte struct counts as 2; and thus the maximum value is 8. */
29174 fprintf (file, "%d,", fixed_parms);
29176 /* 2 bitfields: number of floating point parameters (7 bits), parameters
29178 /* This is actually the number of fp registers that hold parameters;
29179 and thus the maximum value is 13. */
29180 /* Set parameters on stack bit if parameters are not in their original
29181 registers, regardless of whether they are on the stack? Xlc
29182 seems to set the bit when not optimizing. */
29183 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
29185 if (optional_tbtab)
29187 /* Optional fields follow. Some are variable length. */
29189 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
29190 float, 11 double float. */
29191 /* There is an entry for each parameter in a register, in the order
29192 that they occur in the parameter list. Any intervening arguments
29193 on the stack are ignored. If the list overflows a long (max
29194 possible length 34 bits) then completely leave off all elements
29196 /* Only emit this long if there was at least one parameter. */
29197 if (fixed_parms || float_parms)
29198 fprintf (file, "\t.long %d\n", parm_info);
29200 /* Offset from start of code to tb table. */
29201 fputs ("\t.long ", file);
29202 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
29203 RS6000_OUTPUT_BASENAME (file, fname);
29205 rs6000_output_function_entry (file, fname);
29208 /* Interrupt handler mask. */
29209 /* Omit this long, since we never set the interrupt handler bit
29212 /* Number of CTL (controlled storage) anchors. */
29213 /* Omit this long, since the has_ctl bit is never set above. */
29215 /* Displacement into stack of each CTL anchor. */
29216 /* Omit this list of longs, because there are no CTL anchors. */
29218 /* Length of function name. */
29221 fprintf (file, "\t.short %d\n", (int) strlen (fname));
29223 /* Function name. */
29224 assemble_string (fname, strlen (fname));
29226 /* Register for alloca automatic storage; this is always reg 31.
29227 Only emit this if the alloca bit was set above. */
29228 if (frame_pointer_needed)
29229 fputs ("\t.byte 31\n", file);
29231 fputs ("\t.align 2\n", file);
29235 /* Arrange to define .LCTOC1 label, if not already done. */
29239 if (!toc_initialized)
29241 switch_to_section (toc_section);
29242 switch_to_section (current_function_section ());
29247 /* -fsplit-stack support. */
29249 /* A SYMBOL_REF for __morestack. */
29250 static GTY(()) rtx morestack_ref;
29253 gen_add3_const (rtx rt, rtx ra, long c)
29256 return gen_adddi3 (rt, ra, GEN_INT (c));
29258 return gen_addsi3 (rt, ra, GEN_INT (c));
29261 /* Emit -fsplit-stack prologue, which goes before the regular function
29262 prologue (at local entry point in the case of ELFv2). */
29265 rs6000_expand_split_stack_prologue (void)
29267 rs6000_stack_t *info = rs6000_stack_info ();
29268 unsigned HOST_WIDE_INT allocate;
29269 long alloc_hi, alloc_lo;
29270 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
29273 gcc_assert (flag_split_stack && reload_completed);
29278 if (global_regs[29])
29280 error ("%qs uses register r29", "-fsplit-stack");
29281 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
29282 "conflicts with %qD", global_regs_decl[29]);
29285 allocate = info->total_size;
29286 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
29288 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
29291 if (morestack_ref == NULL_RTX)
29293 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
29294 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
29295 | SYMBOL_FLAG_FUNCTION);
29298 r0 = gen_rtx_REG (Pmode, 0);
29299 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29300 r12 = gen_rtx_REG (Pmode, 12);
29301 emit_insn (gen_load_split_stack_limit (r0));
29302 /* Always emit two insns here to calculate the requested stack,
29303 so that the linker can edit them when adjusting size for calling
29304 non-split-stack code. */
29305 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
29306 alloc_lo = -allocate - alloc_hi;
29309 emit_insn (gen_add3_const (r12, r1, alloc_hi));
29311 emit_insn (gen_add3_const (r12, r12, alloc_lo));
29313 emit_insn (gen_nop ());
29317 emit_insn (gen_add3_const (r12, r1, alloc_lo));
29318 emit_insn (gen_nop ());
29321 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
29322 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
29323 ok_label = gen_label_rtx ();
29324 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29325 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
29326 gen_rtx_LABEL_REF (VOIDmode, ok_label),
29328 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29329 JUMP_LABEL (insn) = ok_label;
29330 /* Mark the jump as very likely to be taken. */
29331 add_reg_br_prob_note (insn, profile_probability::very_likely ());
29333 lr = gen_rtx_REG (Pmode, LR_REGNO);
29334 insn = emit_move_insn (r0, lr);
29335 RTX_FRAME_RELATED_P (insn) = 1;
29336 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
29337 RTX_FRAME_RELATED_P (insn) = 1;
29339 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
29340 const0_rtx, const0_rtx));
29341 call_fusage = NULL_RTX;
29342 use_reg (&call_fusage, r12);
29343 /* Say the call uses r0, even though it doesn't, to stop regrename
29344 from twiddling with the insns saving lr, trashing args for cfun.
29345 The insns restoring lr are similarly protected by making
29346 split_stack_return use r0. */
29347 use_reg (&call_fusage, r0);
29348 add_function_usage_to (insn, call_fusage);
29349 /* Indicate that this function can't jump to non-local gotos. */
29350 make_reg_eh_region_note_nothrow_nononlocal (insn);
29351 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
29352 insn = emit_move_insn (lr, r0);
29353 add_reg_note (insn, REG_CFA_RESTORE, lr);
29354 RTX_FRAME_RELATED_P (insn) = 1;
29355 emit_insn (gen_split_stack_return ());
29357 emit_label (ok_label);
29358 LABEL_NUSES (ok_label) = 1;
29361 /* Return the internal arg pointer used for function incoming
29362 arguments. When -fsplit-stack, the arg pointer is r12 so we need
29363 to copy it to a pseudo in order for it to be preserved over calls
29364 and suchlike. We'd really like to use a pseudo here for the
29365 internal arg pointer but data-flow analysis is not prepared to
29366 accept pseudos as live at the beginning of a function. */
29369 rs6000_internal_arg_pointer (void)
29371 if (flag_split_stack
29372 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
29376 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
29380 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
29381 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
29383 /* Put the pseudo initialization right after the note at the
29384 beginning of the function. */
29385 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
29386 gen_rtx_REG (Pmode, 12));
29387 push_topmost_sequence ();
29388 emit_insn_after (pat, get_insns ());
29389 pop_topmost_sequence ();
29391 rtx ret = plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
29392 FIRST_PARM_OFFSET (current_function_decl));
29393 return copy_to_reg (ret);
29395 return virtual_incoming_args_rtx;
29398 /* We may have to tell the dataflow pass that the split stack prologue
29399 is initializing a register. */
29402 rs6000_live_on_entry (bitmap regs)
29404 if (flag_split_stack)
29405 bitmap_set_bit (regs, 12);
29408 /* Emit -fsplit-stack dynamic stack allocation space check. */
29411 rs6000_split_stack_space_check (rtx size, rtx label)
29413 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29414 rtx limit = gen_reg_rtx (Pmode);
29415 rtx requested = gen_reg_rtx (Pmode);
29416 rtx cmp = gen_reg_rtx (CCUNSmode);
29419 emit_insn (gen_load_split_stack_limit (limit));
29420 if (CONST_INT_P (size))
29421 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
29424 size = force_reg (Pmode, size);
29425 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
29427 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
29428 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29429 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
29430 gen_rtx_LABEL_REF (VOIDmode, label),
29432 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29433 JUMP_LABEL (jump) = label;
29436 /* A C compound statement that outputs the assembler code for a thunk
29437 function, used to implement C++ virtual function calls with
29438 multiple inheritance. The thunk acts as a wrapper around a virtual
29439 function, adjusting the implicit object parameter before handing
29440 control off to the real function.
29442 First, emit code to add the integer DELTA to the location that
29443 contains the incoming first argument. Assume that this argument
29444 contains a pointer, and is the one used to pass the `this' pointer
29445 in C++. This is the incoming argument *before* the function
29446 prologue, e.g. `%o0' on a sparc. The addition must preserve the
29447 values of all other incoming arguments.
29449 After the addition, emit code to jump to FUNCTION, which is a
29450 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
29451 not touch the return address. Hence returning from FUNCTION will
29452 return to whoever called the current `thunk'.
29454 The effect must be as if FUNCTION had been called directly with the
29455 adjusted first argument. This macro is responsible for emitting
29456 all of the code for a thunk function; output_function_prologue()
29457 and output_function_epilogue() are not invoked.
29459 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
29460 been extracted from it.) It might possibly be useful on some
29461 targets, but probably not.
29463 If you do not define this macro, the target-independent code in the
29464 C++ frontend will generate a less efficient heavyweight thunk that
29465 calls FUNCTION instead of jumping to it. The generic approach does
29466 not support varargs. */
29469 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
29470 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
29473 rtx this_rtx, funexp;
29476 reload_completed = 1;
29477 epilogue_completed = 1;
29479 /* Mark the end of the (empty) prologue. */
29480 emit_note (NOTE_INSN_PROLOGUE_END);
29482 /* Find the "this" pointer. If the function returns a structure,
29483 the structure return pointer is in r3. */
29484 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
29485 this_rtx = gen_rtx_REG (Pmode, 4);
29487 this_rtx = gen_rtx_REG (Pmode, 3);
29489 /* Apply the constant offset, if required. */
29491 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
29493 /* Apply the offset from the vtable, if required. */
29496 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
29497 rtx tmp = gen_rtx_REG (Pmode, 12);
29499 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
29500 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
29502 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
29503 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
29507 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
29509 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
29511 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
29514 /* Generate a tail call to the target function. */
29515 if (!TREE_USED (function))
29517 assemble_external (function);
29518 TREE_USED (function) = 1;
29520 funexp = XEXP (DECL_RTL (function), 0);
29521 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29524 if (MACHOPIC_INDIRECT)
29525 funexp = machopic_indirect_call_target (funexp);
29528 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
29529 generate sibcall RTL explicitly. */
29530 insn = emit_call_insn (
29531 gen_rtx_PARALLEL (VOIDmode,
29533 gen_rtx_CALL (VOIDmode,
29534 funexp, const0_rtx),
29535 gen_rtx_USE (VOIDmode, const0_rtx),
29536 simple_return_rtx)));
29537 SIBLING_CALL_P (insn) = 1;
29540 /* Run just enough of rest_of_compilation to get the insns emitted.
29541 There's not really enough bulk here to make other passes such as
29542 instruction scheduling worth while. Note that use_thunk calls
29543 assemble_start_function and assemble_end_function. */
29544 insn = get_insns ();
29545 shorten_branches (insn);
29546 final_start_function (insn, file, 1);
29547 final (insn, file, 1);
29548 final_end_function ();
29550 reload_completed = 0;
29551 epilogue_completed = 0;
29554 /* A quick summary of the various types of 'constant-pool tables'
29557 Target Flags Name One table per
29558 AIX (none) AIX TOC object file
29559 AIX -mfull-toc AIX TOC object file
29560 AIX -mminimal-toc AIX minimal TOC translation unit
29561 SVR4/EABI (none) SVR4 SDATA object file
29562 SVR4/EABI -fpic SVR4 pic object file
29563 SVR4/EABI -fPIC SVR4 PIC translation unit
29564 SVR4/EABI -mrelocatable EABI TOC function
29565 SVR4/EABI -maix AIX TOC object file
29566 SVR4/EABI -maix -mminimal-toc
29567 AIX minimal TOC translation unit
29569 Name Reg. Set by entries contains:
29570 made by addrs? fp? sum?
29572 AIX TOC 2 crt0 as Y option option
29573 AIX minimal TOC 30 prolog gcc Y Y option
29574 SVR4 SDATA 13 crt0 gcc N Y N
29575 SVR4 pic 30 prolog ld Y not yet N
29576 SVR4 PIC 30 prolog gcc Y option option
29577 EABI TOC 30 prolog gcc Y option option
29581 /* Hash functions for the hash table. */
29584 rs6000_hash_constant (rtx k)
29586 enum rtx_code code = GET_CODE (k);
29587 machine_mode mode = GET_MODE (k);
29588 unsigned result = (code << 3) ^ mode;
29589 const char *format;
29592 format = GET_RTX_FORMAT (code);
29593 flen = strlen (format);
29599 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
29601 case CONST_WIDE_INT:
29604 flen = CONST_WIDE_INT_NUNITS (k);
29605 for (i = 0; i < flen; i++)
29606 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
29611 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
29621 for (; fidx < flen; fidx++)
29622 switch (format[fidx])
29627 const char *str = XSTR (k, fidx);
29628 len = strlen (str);
29629 result = result * 613 + len;
29630 for (i = 0; i < len; i++)
29631 result = result * 613 + (unsigned) str[i];
29636 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
29640 result = result * 613 + (unsigned) XINT (k, fidx);
29643 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
29644 result = result * 613 + (unsigned) XWINT (k, fidx);
29648 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
29649 result = result * 613 + (unsigned) (XWINT (k, fidx)
29656 gcc_unreachable ();
29663 toc_hasher::hash (toc_hash_struct *thc)
29665 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
29668 /* Compare H1 and H2 for equivalence. */
29671 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
29676 if (h1->key_mode != h2->key_mode)
29679 return rtx_equal_p (r1, r2);
29682 /* These are the names given by the C++ front-end to vtables, and
29683 vtable-like objects. Ideally, this logic should not be here;
29684 instead, there should be some programmatic way of inquiring as
29685 to whether or not an object is a vtable. */
29687 #define VTABLE_NAME_P(NAME) \
29688 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
29689 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
29690 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
29691 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
29692 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
29694 #ifdef NO_DOLLAR_IN_LABEL
29695 /* Return a GGC-allocated character string translating dollar signs in
29696 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
29699 rs6000_xcoff_strip_dollar (const char *name)
29705 q = (const char *) strchr (name, '$');
29707 if (q == 0 || q == name)
29710 len = strlen (name);
29711 strip = XALLOCAVEC (char, len + 1);
29712 strcpy (strip, name);
29713 p = strip + (q - name);
29717 p = strchr (p + 1, '$');
29720 return ggc_alloc_string (strip, len);
29725 rs6000_output_symbol_ref (FILE *file, rtx x)
29727 const char *name = XSTR (x, 0);
29729 /* Currently C++ toc references to vtables can be emitted before it
29730 is decided whether the vtable is public or private. If this is
29731 the case, then the linker will eventually complain that there is
29732 a reference to an unknown section. Thus, for vtables only,
29733 we emit the TOC reference to reference the identifier and not the
29735 if (VTABLE_NAME_P (name))
29737 RS6000_OUTPUT_BASENAME (file, name);
29740 assemble_name (file, name);
29743 /* Output a TOC entry. We derive the entry name from what is being
29747 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
29750 const char *name = buf;
29752 HOST_WIDE_INT offset = 0;
29754 gcc_assert (!TARGET_NO_TOC);
29756 /* When the linker won't eliminate them, don't output duplicate
29757 TOC entries (this happens on AIX if there is any kind of TOC,
29758 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
29760 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
29762 struct toc_hash_struct *h;
29764 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
29765 time because GGC is not initialized at that point. */
29766 if (toc_hash_table == NULL)
29767 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
29769 h = ggc_alloc<toc_hash_struct> ();
29771 h->key_mode = mode;
29772 h->labelno = labelno;
29774 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
29775 if (*found == NULL)
29777 else /* This is indeed a duplicate.
29778 Set this label equal to that label. */
29780 fputs ("\t.set ", file);
29781 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29782 fprintf (file, "%d,", labelno);
29783 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29784 fprintf (file, "%d\n", ((*found)->labelno));
29787 if (TARGET_XCOFF && SYMBOL_REF_P (x)
29788 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
29789 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
29791 fputs ("\t.set ", file);
29792 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29793 fprintf (file, "%d,", labelno);
29794 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29795 fprintf (file, "%d\n", ((*found)->labelno));
29802 /* If we're going to put a double constant in the TOC, make sure it's
29803 aligned properly when strict alignment is on. */
29804 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
29805 && STRICT_ALIGNMENT
29806 && GET_MODE_BITSIZE (mode) >= 64
29807 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
29808 ASM_OUTPUT_ALIGN (file, 3);
29811 (*targetm.asm_out.internal_label) (file, "LC", labelno);
29813 /* Handle FP constants specially. Note that if we have a minimal
29814 TOC, things we put here aren't actually in the TOC, so we can allow
29816 if (CONST_DOUBLE_P (x)
29817 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
29818 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
29822 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29823 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
29825 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29829 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29830 fputs (DOUBLE_INT_ASM_OP, file);
29832 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29833 k[0] & 0xffffffff, k[1] & 0xffffffff,
29834 k[2] & 0xffffffff, k[3] & 0xffffffff);
29835 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
29836 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29837 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
29838 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
29839 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
29844 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29845 fputs ("\t.long ", file);
29847 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29848 k[0] & 0xffffffff, k[1] & 0xffffffff,
29849 k[2] & 0xffffffff, k[3] & 0xffffffff);
29850 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
29851 k[0] & 0xffffffff, k[1] & 0xffffffff,
29852 k[2] & 0xffffffff, k[3] & 0xffffffff);
29856 else if (CONST_DOUBLE_P (x)
29857 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
29861 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29862 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
29864 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29868 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29869 fputs (DOUBLE_INT_ASM_OP, file);
29871 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29872 k[0] & 0xffffffff, k[1] & 0xffffffff);
29873 fprintf (file, "0x%lx%08lx\n",
29874 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29875 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
29880 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29881 fputs ("\t.long ", file);
29883 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29884 k[0] & 0xffffffff, k[1] & 0xffffffff);
29885 fprintf (file, "0x%lx,0x%lx\n",
29886 k[0] & 0xffffffff, k[1] & 0xffffffff);
29890 else if (CONST_DOUBLE_P (x)
29891 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
29895 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29896 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
29898 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
29902 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29903 fputs (DOUBLE_INT_ASM_OP, file);
29905 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29906 if (WORDS_BIG_ENDIAN)
29907 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
29909 fprintf (file, "0x%lx\n", l & 0xffffffff);
29914 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29915 fputs ("\t.long ", file);
29917 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29918 fprintf (file, "0x%lx\n", l & 0xffffffff);
29922 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
29924 unsigned HOST_WIDE_INT low;
29925 HOST_WIDE_INT high;
29927 low = INTVAL (x) & 0xffffffff;
29928 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
29930 /* TOC entries are always Pmode-sized, so when big-endian
29931 smaller integer constants in the TOC need to be padded.
29932 (This is still a win over putting the constants in
29933 a separate constant pool, because then we'd have
29934 to have both a TOC entry _and_ the actual constant.)
29936 For a 32-bit target, CONST_INT values are loaded and shifted
29937 entirely within `low' and can be stored in one TOC entry. */
29939 /* It would be easy to make this work, but it doesn't now. */
29940 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
29942 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
29945 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
29946 high = (HOST_WIDE_INT) low >> 32;
29952 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29953 fputs (DOUBLE_INT_ASM_OP, file);
29955 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29956 (long) high & 0xffffffff, (long) low & 0xffffffff);
29957 fprintf (file, "0x%lx%08lx\n",
29958 (long) high & 0xffffffff, (long) low & 0xffffffff);
29963 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
29965 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29966 fputs ("\t.long ", file);
29968 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29969 (long) high & 0xffffffff, (long) low & 0xffffffff);
29970 fprintf (file, "0x%lx,0x%lx\n",
29971 (long) high & 0xffffffff, (long) low & 0xffffffff);
29975 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29976 fputs ("\t.long ", file);
29978 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
29979 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
29985 if (GET_CODE (x) == CONST)
29987 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
29988 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
29990 base = XEXP (XEXP (x, 0), 0);
29991 offset = INTVAL (XEXP (XEXP (x, 0), 1));
29994 switch (GET_CODE (base))
29997 name = XSTR (base, 0);
30001 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
30002 CODE_LABEL_NUMBER (XEXP (base, 0)));
30006 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
30010 gcc_unreachable ();
30013 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30014 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
30017 fputs ("\t.tc ", file);
30018 RS6000_OUTPUT_BASENAME (file, name);
30021 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
30023 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
30025 /* Mark large TOC symbols on AIX with [TE] so they are mapped
30026 after other TOC symbols, reducing overflow of small TOC access
30027 to [TC] symbols. */
30028 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
30029 ? "[TE]," : "[TC],", file);
30032 /* Currently C++ toc references to vtables can be emitted before it
30033 is decided whether the vtable is public or private. If this is
30034 the case, then the linker will eventually complain that there is
30035 a TOC reference to an unknown section. Thus, for vtables only,
30036 we emit the TOC reference to reference the symbol and not the
30038 if (VTABLE_NAME_P (name))
30040 RS6000_OUTPUT_BASENAME (file, name);
30042 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
30043 else if (offset > 0)
30044 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
30047 output_addr_const (file, x);
30050 if (TARGET_XCOFF && SYMBOL_REF_P (base))
30052 switch (SYMBOL_REF_TLS_MODEL (base))
30056 case TLS_MODEL_LOCAL_EXEC:
30057 fputs ("@le", file);
30059 case TLS_MODEL_INITIAL_EXEC:
30060 fputs ("@ie", file);
30062 /* Use global-dynamic for local-dynamic. */
30063 case TLS_MODEL_GLOBAL_DYNAMIC:
30064 case TLS_MODEL_LOCAL_DYNAMIC:
30066 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
30067 fputs ("\t.tc .", file);
30068 RS6000_OUTPUT_BASENAME (file, name);
30069 fputs ("[TC],", file);
30070 output_addr_const (file, x);
30071 fputs ("@m", file);
30074 gcc_unreachable ();
30082 /* Output an assembler pseudo-op to write an ASCII string of N characters
30083 starting at P to FILE.
30085 On the RS/6000, we have to do this using the .byte operation and
30086 write out special characters outside the quoted string.
30087 Also, the assembler is broken; very long strings are truncated,
30088 so we must artificially break them up early. */
30091 output_ascii (FILE *file, const char *p, int n)
30094 int i, count_string;
30095 const char *for_string = "\t.byte \"";
30096 const char *for_decimal = "\t.byte ";
30097 const char *to_close = NULL;
30100 for (i = 0; i < n; i++)
30103 if (c >= ' ' && c < 0177)
30106 fputs (for_string, file);
30109 /* Write two quotes to get one. */
30117 for_decimal = "\"\n\t.byte ";
30121 if (count_string >= 512)
30123 fputs (to_close, file);
30125 for_string = "\t.byte \"";
30126 for_decimal = "\t.byte ";
30134 fputs (for_decimal, file);
30135 fprintf (file, "%d", c);
30137 for_string = "\n\t.byte \"";
30138 for_decimal = ", ";
30144 /* Now close the string if we have written one. Then end the line. */
30146 fputs (to_close, file);
30149 /* Generate a unique section name for FILENAME for a section type
30150 represented by SECTION_DESC. Output goes into BUF.
30152 SECTION_DESC can be any string, as long as it is different for each
30153 possible section type.
30155 We name the section in the same manner as xlc. The name begins with an
30156 underscore followed by the filename (after stripping any leading directory
30157 names) with the last period replaced by the string SECTION_DESC. If
30158 FILENAME does not contain a period, SECTION_DESC is appended to the end of
30162 rs6000_gen_section_name (char **buf, const char *filename,
30163 const char *section_desc)
30165 const char *q, *after_last_slash, *last_period = 0;
30169 after_last_slash = filename;
30170 for (q = filename; *q; q++)
30173 after_last_slash = q + 1;
30174 else if (*q == '.')
30178 len = strlen (after_last_slash) + strlen (section_desc) + 2;
30179 *buf = (char *) xmalloc (len);
30184 for (q = after_last_slash; *q; q++)
30186 if (q == last_period)
30188 strcpy (p, section_desc);
30189 p += strlen (section_desc);
30193 else if (ISALNUM (*q))
30197 if (last_period == 0)
30198 strcpy (p, section_desc);
30203 /* Emit profile function. */
30206 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
30208 /* Non-standard profiling for kernels, which just saves LR then calls
30209 _mcount without worrying about arg saves. The idea is to change
30210 the function prologue as little as possible as it isn't easy to
30211 account for arg save/restore code added just for _mcount. */
30212 if (TARGET_PROFILE_KERNEL)
30215 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30217 #ifndef NO_PROFILE_COUNTERS
30218 # define NO_PROFILE_COUNTERS 0
30220 if (NO_PROFILE_COUNTERS)
30221 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
30222 LCT_NORMAL, VOIDmode);
30226 const char *label_name;
30229 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30230 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
30231 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
30233 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
30234 LCT_NORMAL, VOIDmode, fun, Pmode);
30237 else if (DEFAULT_ABI == ABI_DARWIN)
30239 const char *mcount_name = RS6000_MCOUNT;
30240 int caller_addr_regno = LR_REGNO;
30242 /* Be conservative and always set this, at least for now. */
30243 crtl->uses_pic_offset_table = 1;
30246 /* For PIC code, set up a stub and collect the caller's address
30247 from r0, which is where the prologue puts it. */
30248 if (MACHOPIC_INDIRECT
30249 && crtl->uses_pic_offset_table)
30250 caller_addr_regno = 0;
30252 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
30253 LCT_NORMAL, VOIDmode,
30254 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
30258 /* Write function profiler code. */
30261 output_function_profiler (FILE *file, int labelno)
30265 switch (DEFAULT_ABI)
30268 gcc_unreachable ();
30273 warning (0, "no profiling of 64-bit code for this ABI");
30276 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30277 fprintf (file, "\tmflr %s\n", reg_names[0]);
30278 if (NO_PROFILE_COUNTERS)
30280 asm_fprintf (file, "\tstw %s,4(%s)\n",
30281 reg_names[0], reg_names[1]);
30283 else if (TARGET_SECURE_PLT && flag_pic)
30285 if (TARGET_LINK_STACK)
30288 get_ppc476_thunk_name (name);
30289 asm_fprintf (file, "\tbl %s\n", name);
30292 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
30293 asm_fprintf (file, "\tstw %s,4(%s)\n",
30294 reg_names[0], reg_names[1]);
30295 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30296 asm_fprintf (file, "\taddis %s,%s,",
30297 reg_names[12], reg_names[12]);
30298 assemble_name (file, buf);
30299 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
30300 assemble_name (file, buf);
30301 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
30303 else if (flag_pic == 1)
30305 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
30306 asm_fprintf (file, "\tstw %s,4(%s)\n",
30307 reg_names[0], reg_names[1]);
30308 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30309 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
30310 assemble_name (file, buf);
30311 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
30313 else if (flag_pic > 1)
30315 asm_fprintf (file, "\tstw %s,4(%s)\n",
30316 reg_names[0], reg_names[1]);
30317 /* Now, we need to get the address of the label. */
30318 if (TARGET_LINK_STACK)
30321 get_ppc476_thunk_name (name);
30322 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
30323 assemble_name (file, buf);
30324 fputs ("-.\n1:", file);
30325 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30326 asm_fprintf (file, "\taddi %s,%s,4\n",
30327 reg_names[11], reg_names[11]);
30331 fputs ("\tbcl 20,31,1f\n\t.long ", file);
30332 assemble_name (file, buf);
30333 fputs ("-.\n1:", file);
30334 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30336 asm_fprintf (file, "\tlwz %s,0(%s)\n",
30337 reg_names[0], reg_names[11]);
30338 asm_fprintf (file, "\tadd %s,%s,%s\n",
30339 reg_names[0], reg_names[0], reg_names[11]);
30343 asm_fprintf (file, "\tlis %s,", reg_names[12]);
30344 assemble_name (file, buf);
30345 fputs ("@ha\n", file);
30346 asm_fprintf (file, "\tstw %s,4(%s)\n",
30347 reg_names[0], reg_names[1]);
30348 asm_fprintf (file, "\tla %s,", reg_names[0]);
30349 assemble_name (file, buf);
30350 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
30353 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
30354 fprintf (file, "\tbl %s%s\n",
30355 RS6000_MCOUNT, flag_pic ? "@plt" : "");
30361 /* Don't do anything, done in output_profile_hook (). */
30368 /* The following variable value is the last issued insn. */
30370 static rtx_insn *last_scheduled_insn;
30372 /* The following variable helps to balance issuing of load and
30373 store instructions */
30375 static int load_store_pendulum;
30377 /* The following variable helps pair divide insns during scheduling. */
30378 static int divide_cnt;
30379 /* The following variable helps pair and alternate vector and vector load
30380 insns during scheduling. */
30381 static int vec_pairing;
30384 /* Power4 load update and store update instructions are cracked into a
30385 load or store and an integer insn which are executed in the same cycle.
30386 Branches have their own dispatch slot which does not count against the
30387 GCC issue rate, but it changes the program flow so there are no other
30388 instructions to issue in this cycle. */
30391 rs6000_variable_issue_1 (rtx_insn *insn, int more)
30393 last_scheduled_insn = insn;
30394 if (GET_CODE (PATTERN (insn)) == USE
30395 || GET_CODE (PATTERN (insn)) == CLOBBER)
30397 cached_can_issue_more = more;
30398 return cached_can_issue_more;
30401 if (insn_terminates_group_p (insn, current_group))
30403 cached_can_issue_more = 0;
30404 return cached_can_issue_more;
30407 /* If no reservation, but reach here */
30408 if (recog_memoized (insn) < 0)
30411 if (rs6000_sched_groups)
30413 if (is_microcoded_insn (insn))
30414 cached_can_issue_more = 0;
30415 else if (is_cracked_insn (insn))
30416 cached_can_issue_more = more > 2 ? more - 2 : 0;
30418 cached_can_issue_more = more - 1;
30420 return cached_can_issue_more;
30423 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
30426 cached_can_issue_more = more - 1;
30427 return cached_can_issue_more;
30431 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
30433 int r = rs6000_variable_issue_1 (insn, more);
30435 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
30439 /* Adjust the cost of a scheduling dependency. Return the new cost of
30440 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
30443 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
30446 enum attr_type attr_type;
30448 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
30455 /* Data dependency; DEP_INSN writes a register that INSN reads
30456 some cycles later. */
30458 /* Separate a load from a narrower, dependent store. */
30459 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9)
30460 && GET_CODE (PATTERN (insn)) == SET
30461 && GET_CODE (PATTERN (dep_insn)) == SET
30462 && MEM_P (XEXP (PATTERN (insn), 1))
30463 && MEM_P (XEXP (PATTERN (dep_insn), 0))
30464 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
30465 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
30468 attr_type = get_attr_type (insn);
30473 /* Tell the first scheduling pass about the latency between
30474 a mtctr and bctr (and mtlr and br/blr). The first
30475 scheduling pass will not know about this latency since
30476 the mtctr instruction, which has the latency associated
30477 to it, will be generated by reload. */
30480 /* Leave some extra cycles between a compare and its
30481 dependent branch, to inhibit expensive mispredicts. */
30482 if ((rs6000_tune == PROCESSOR_PPC603
30483 || rs6000_tune == PROCESSOR_PPC604
30484 || rs6000_tune == PROCESSOR_PPC604e
30485 || rs6000_tune == PROCESSOR_PPC620
30486 || rs6000_tune == PROCESSOR_PPC630
30487 || rs6000_tune == PROCESSOR_PPC750
30488 || rs6000_tune == PROCESSOR_PPC7400
30489 || rs6000_tune == PROCESSOR_PPC7450
30490 || rs6000_tune == PROCESSOR_PPCE5500
30491 || rs6000_tune == PROCESSOR_PPCE6500
30492 || rs6000_tune == PROCESSOR_POWER4
30493 || rs6000_tune == PROCESSOR_POWER5
30494 || rs6000_tune == PROCESSOR_POWER7
30495 || rs6000_tune == PROCESSOR_POWER8
30496 || rs6000_tune == PROCESSOR_POWER9
30497 || rs6000_tune == PROCESSOR_CELL)
30498 && recog_memoized (dep_insn)
30499 && (INSN_CODE (dep_insn) >= 0))
30501 switch (get_attr_type (dep_insn))
30504 case TYPE_FPCOMPARE:
30505 case TYPE_CR_LOGICAL:
30509 if (get_attr_dot (dep_insn) == DOT_YES)
30514 if (get_attr_dot (dep_insn) == DOT_YES
30515 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
30526 if ((rs6000_tune == PROCESSOR_POWER6)
30527 && recog_memoized (dep_insn)
30528 && (INSN_CODE (dep_insn) >= 0))
30531 if (GET_CODE (PATTERN (insn)) != SET)
30532 /* If this happens, we have to extend this to schedule
30533 optimally. Return default for now. */
30536 /* Adjust the cost for the case where the value written
30537 by a fixed point operation is used as the address
30538 gen value on a store. */
30539 switch (get_attr_type (dep_insn))
30544 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30545 return get_attr_sign_extend (dep_insn)
30546 == SIGN_EXTEND_YES ? 6 : 4;
30551 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30552 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30562 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30570 if (get_attr_update (dep_insn) == UPDATE_YES
30571 && ! rs6000_store_data_bypass_p (dep_insn, insn))
30577 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30583 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30584 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30594 if ((rs6000_tune == PROCESSOR_POWER6)
30595 && recog_memoized (dep_insn)
30596 && (INSN_CODE (dep_insn) >= 0))
30599 /* Adjust the cost for the case where the value written
30600 by a fixed point instruction is used within the address
30601 gen portion of a subsequent load(u)(x) */
30602 switch (get_attr_type (dep_insn))
30607 if (set_to_load_agen (dep_insn, insn))
30608 return get_attr_sign_extend (dep_insn)
30609 == SIGN_EXTEND_YES ? 6 : 4;
30614 if (set_to_load_agen (dep_insn, insn))
30615 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30625 if (set_to_load_agen (dep_insn, insn))
30633 if (get_attr_update (dep_insn) == UPDATE_YES
30634 && set_to_load_agen (dep_insn, insn))
30640 if (set_to_load_agen (dep_insn, insn))
30646 if (set_to_load_agen (dep_insn, insn))
30647 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30657 if ((rs6000_tune == PROCESSOR_POWER6)
30658 && get_attr_update (insn) == UPDATE_NO
30659 && recog_memoized (dep_insn)
30660 && (INSN_CODE (dep_insn) >= 0)
30661 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
30668 /* Fall out to return default cost. */
30672 case REG_DEP_OUTPUT:
30673 /* Output dependency; DEP_INSN writes a register that INSN writes some
30675 if ((rs6000_tune == PROCESSOR_POWER6)
30676 && recog_memoized (dep_insn)
30677 && (INSN_CODE (dep_insn) >= 0))
30679 attr_type = get_attr_type (insn);
30684 case TYPE_FPSIMPLE:
30685 if (get_attr_type (dep_insn) == TYPE_FP
30686 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
30690 if (get_attr_update (insn) == UPDATE_NO
30691 && get_attr_type (dep_insn) == TYPE_MFFGPR)
30698 /* Fall through, no cost for output dependency. */
30702 /* Anti dependency; DEP_INSN reads a register that INSN writes some
30707 gcc_unreachable ();
30713 /* Debug version of rs6000_adjust_cost. */
30716 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
30717 int cost, unsigned int dw)
30719 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
30727 default: dep = "unknown depencency"; break;
30728 case REG_DEP_TRUE: dep = "data dependency"; break;
30729 case REG_DEP_OUTPUT: dep = "output dependency"; break;
30730 case REG_DEP_ANTI: dep = "anti depencency"; break;
30734 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
30735 "%s, insn:\n", ret, cost, dep);
30743 /* The function returns a true if INSN is microcoded.
30744 Return false otherwise. */
30747 is_microcoded_insn (rtx_insn *insn)
30749 if (!insn || !NONDEBUG_INSN_P (insn)
30750 || GET_CODE (PATTERN (insn)) == USE
30751 || GET_CODE (PATTERN (insn)) == CLOBBER)
30754 if (rs6000_tune == PROCESSOR_CELL)
30755 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
30757 if (rs6000_sched_groups
30758 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
30760 enum attr_type type = get_attr_type (insn);
30761 if ((type == TYPE_LOAD
30762 && get_attr_update (insn) == UPDATE_YES
30763 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
30764 || ((type == TYPE_LOAD || type == TYPE_STORE)
30765 && get_attr_update (insn) == UPDATE_YES
30766 && get_attr_indexed (insn) == INDEXED_YES)
30767 || type == TYPE_MFCR)
30774 /* The function returns true if INSN is cracked into 2 instructions
30775 by the processor (and therefore occupies 2 issue slots). */
30778 is_cracked_insn (rtx_insn *insn)
30780 if (!insn || !NONDEBUG_INSN_P (insn)
30781 || GET_CODE (PATTERN (insn)) == USE
30782 || GET_CODE (PATTERN (insn)) == CLOBBER)
30785 if (rs6000_sched_groups
30786 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
30788 enum attr_type type = get_attr_type (insn);
30789 if ((type == TYPE_LOAD
30790 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30791 && get_attr_update (insn) == UPDATE_NO)
30792 || (type == TYPE_LOAD
30793 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
30794 && get_attr_update (insn) == UPDATE_YES
30795 && get_attr_indexed (insn) == INDEXED_NO)
30796 || (type == TYPE_STORE
30797 && get_attr_update (insn) == UPDATE_YES
30798 && get_attr_indexed (insn) == INDEXED_NO)
30799 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
30800 && get_attr_update (insn) == UPDATE_YES)
30801 || (type == TYPE_CR_LOGICAL
30802 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
30803 || (type == TYPE_EXTS
30804 && get_attr_dot (insn) == DOT_YES)
30805 || (type == TYPE_SHIFT
30806 && get_attr_dot (insn) == DOT_YES
30807 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
30808 || (type == TYPE_MUL
30809 && get_attr_dot (insn) == DOT_YES)
30810 || type == TYPE_DIV
30811 || (type == TYPE_INSERT
30812 && get_attr_size (insn) == SIZE_32))
30819 /* The function returns true if INSN can be issued only from
30820 the branch slot. */
30823 is_branch_slot_insn (rtx_insn *insn)
30825 if (!insn || !NONDEBUG_INSN_P (insn)
30826 || GET_CODE (PATTERN (insn)) == USE
30827 || GET_CODE (PATTERN (insn)) == CLOBBER)
30830 if (rs6000_sched_groups)
30832 enum attr_type type = get_attr_type (insn);
30833 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
30841 /* The function returns true if out_inst sets a value that is
30842 used in the address generation computation of in_insn */
30844 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
30846 rtx out_set, in_set;
30848 /* For performance reasons, only handle the simple case where
30849 both loads are a single_set. */
30850 out_set = single_set (out_insn);
30853 in_set = single_set (in_insn);
30855 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
30861 /* Try to determine base/offset/size parts of the given MEM.
30862 Return true if successful, false if all the values couldn't
30865 This function only looks for REG or REG+CONST address forms.
30866 REG+REG address form will return false. */
30869 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
30870 HOST_WIDE_INT *size)
30873 if MEM_SIZE_KNOWN_P (mem)
30874 *size = MEM_SIZE (mem);
30878 addr_rtx = (XEXP (mem, 0));
30879 if (GET_CODE (addr_rtx) == PRE_MODIFY)
30880 addr_rtx = XEXP (addr_rtx, 1);
30883 while (GET_CODE (addr_rtx) == PLUS
30884 && CONST_INT_P (XEXP (addr_rtx, 1)))
30886 *offset += INTVAL (XEXP (addr_rtx, 1));
30887 addr_rtx = XEXP (addr_rtx, 0);
30889 if (!REG_P (addr_rtx))
30896 /* The function returns true if the target storage location of
30897 mem1 is adjacent to the target storage location of mem2 */
30898 /* Return 1 if memory locations are adjacent. */
30901 adjacent_mem_locations (rtx mem1, rtx mem2)
30904 HOST_WIDE_INT off1, size1, off2, size2;
30906 if (get_memref_parts (mem1, ®1, &off1, &size1)
30907 && get_memref_parts (mem2, ®2, &off2, &size2))
30908 return ((REGNO (reg1) == REGNO (reg2))
30909 && ((off1 + size1 == off2)
30910 || (off2 + size2 == off1)));
30915 /* This function returns true if it can be determined that the two MEM
30916 locations overlap by at least 1 byte based on base reg/offset/size. */
30919 mem_locations_overlap (rtx mem1, rtx mem2)
30922 HOST_WIDE_INT off1, size1, off2, size2;
30924 if (get_memref_parts (mem1, ®1, &off1, &size1)
30925 && get_memref_parts (mem2, ®2, &off2, &size2))
30926 return ((REGNO (reg1) == REGNO (reg2))
30927 && (((off1 <= off2) && (off1 + size1 > off2))
30928 || ((off2 <= off1) && (off2 + size2 > off1))));
30933 /* A C statement (sans semicolon) to update the integer scheduling
30934 priority INSN_PRIORITY (INSN). Increase the priority to execute the
30935 INSN earlier, reduce the priority to execute INSN later. Do not
30936 define this macro if you do not need to adjust the scheduling
30937 priorities of insns. */
30940 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
30942 rtx load_mem, str_mem;
30943 /* On machines (like the 750) which have asymmetric integer units,
30944 where one integer unit can do multiply and divides and the other
30945 can't, reduce the priority of multiply/divide so it is scheduled
30946 before other integer operations. */
30949 if (! INSN_P (insn))
30952 if (GET_CODE (PATTERN (insn)) == USE)
30955 switch (rs6000_tune) {
30956 case PROCESSOR_PPC750:
30957 switch (get_attr_type (insn))
30964 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
30965 priority, priority);
30966 if (priority >= 0 && priority < 0x01000000)
30973 if (insn_must_be_first_in_group (insn)
30974 && reload_completed
30975 && current_sched_info->sched_max_insns_priority
30976 && rs6000_sched_restricted_insns_priority)
30979 /* Prioritize insns that can be dispatched only in the first
30981 if (rs6000_sched_restricted_insns_priority == 1)
30982 /* Attach highest priority to insn. This means that in
30983 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
30984 precede 'priority' (critical path) considerations. */
30985 return current_sched_info->sched_max_insns_priority;
30986 else if (rs6000_sched_restricted_insns_priority == 2)
30987 /* Increase priority of insn by a minimal amount. This means that in
30988 haifa-sched.c:ready_sort(), only 'priority' (critical path)
30989 considerations precede dispatch-slot restriction considerations. */
30990 return (priority + 1);
30993 if (rs6000_tune == PROCESSOR_POWER6
30994 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
30995 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
30996 /* Attach highest priority to insn if the scheduler has just issued two
30997 stores and this instruction is a load, or two loads and this instruction
30998 is a store. Power6 wants loads and stores scheduled alternately
31000 return current_sched_info->sched_max_insns_priority;
31005 /* Return true if the instruction is nonpipelined on the Cell. */
31007 is_nonpipeline_insn (rtx_insn *insn)
31009 enum attr_type type;
31010 if (!insn || !NONDEBUG_INSN_P (insn)
31011 || GET_CODE (PATTERN (insn)) == USE
31012 || GET_CODE (PATTERN (insn)) == CLOBBER)
31015 type = get_attr_type (insn);
31016 if (type == TYPE_MUL
31017 || type == TYPE_DIV
31018 || type == TYPE_SDIV
31019 || type == TYPE_DDIV
31020 || type == TYPE_SSQRT
31021 || type == TYPE_DSQRT
31022 || type == TYPE_MFCR
31023 || type == TYPE_MFCRF
31024 || type == TYPE_MFJMPR)
31032 /* Return how many instructions the machine can issue per cycle. */
31035 rs6000_issue_rate (void)
31037 /* Unless scheduling for register pressure, use issue rate of 1 for
31038 first scheduling pass to decrease degradation. */
31039 if (!reload_completed && !flag_sched_pressure)
31042 switch (rs6000_tune) {
31043 case PROCESSOR_RS64A:
31044 case PROCESSOR_PPC601: /* ? */
31045 case PROCESSOR_PPC7450:
31047 case PROCESSOR_PPC440:
31048 case PROCESSOR_PPC603:
31049 case PROCESSOR_PPC750:
31050 case PROCESSOR_PPC7400:
31051 case PROCESSOR_PPC8540:
31052 case PROCESSOR_PPC8548:
31053 case PROCESSOR_CELL:
31054 case PROCESSOR_PPCE300C2:
31055 case PROCESSOR_PPCE300C3:
31056 case PROCESSOR_PPCE500MC:
31057 case PROCESSOR_PPCE500MC64:
31058 case PROCESSOR_PPCE5500:
31059 case PROCESSOR_PPCE6500:
31060 case PROCESSOR_TITAN:
31062 case PROCESSOR_PPC476:
31063 case PROCESSOR_PPC604:
31064 case PROCESSOR_PPC604e:
31065 case PROCESSOR_PPC620:
31066 case PROCESSOR_PPC630:
31068 case PROCESSOR_POWER4:
31069 case PROCESSOR_POWER5:
31070 case PROCESSOR_POWER6:
31071 case PROCESSOR_POWER7:
31073 case PROCESSOR_POWER8:
31075 case PROCESSOR_POWER9:
31082 /* Return how many instructions to look ahead for better insn
31086 rs6000_use_sched_lookahead (void)
31088 switch (rs6000_tune)
31090 case PROCESSOR_PPC8540:
31091 case PROCESSOR_PPC8548:
31094 case PROCESSOR_CELL:
31095 return (reload_completed ? 8 : 0);
31102 /* We are choosing insn from the ready queue. Return zero if INSN can be
31105 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
31107 if (ready_index == 0)
31110 if (rs6000_tune != PROCESSOR_CELL)
31113 gcc_assert (insn != NULL_RTX && INSN_P (insn));
31115 if (!reload_completed
31116 || is_nonpipeline_insn (insn)
31117 || is_microcoded_insn (insn))
31123 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
31124 and return true. */
31127 find_mem_ref (rtx pat, rtx *mem_ref)
31132 /* stack_tie does not produce any real memory traffic. */
31133 if (tie_operand (pat, VOIDmode))
31142 /* Recursively process the pattern. */
31143 fmt = GET_RTX_FORMAT (GET_CODE (pat));
31145 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
31149 if (find_mem_ref (XEXP (pat, i), mem_ref))
31152 else if (fmt[i] == 'E')
31153 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
31155 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
31163 /* Determine if PAT is a PATTERN of a load insn. */
31166 is_load_insn1 (rtx pat, rtx *load_mem)
31168 if (!pat || pat == NULL_RTX)
31171 if (GET_CODE (pat) == SET)
31172 return find_mem_ref (SET_SRC (pat), load_mem);
31174 if (GET_CODE (pat) == PARALLEL)
31178 for (i = 0; i < XVECLEN (pat, 0); i++)
31179 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
31186 /* Determine if INSN loads from memory. */
31189 is_load_insn (rtx insn, rtx *load_mem)
31191 if (!insn || !INSN_P (insn))
31197 return is_load_insn1 (PATTERN (insn), load_mem);
31200 /* Determine if PAT is a PATTERN of a store insn. */
31203 is_store_insn1 (rtx pat, rtx *str_mem)
31205 if (!pat || pat == NULL_RTX)
31208 if (GET_CODE (pat) == SET)
31209 return find_mem_ref (SET_DEST (pat), str_mem);
31211 if (GET_CODE (pat) == PARALLEL)
31215 for (i = 0; i < XVECLEN (pat, 0); i++)
31216 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
31223 /* Determine if INSN stores to memory. */
31226 is_store_insn (rtx insn, rtx *str_mem)
31228 if (!insn || !INSN_P (insn))
31231 return is_store_insn1 (PATTERN (insn), str_mem);
31234 /* Return whether TYPE is a Power9 pairable vector instruction type. */
31237 is_power9_pairable_vec_type (enum attr_type type)
31241 case TYPE_VECSIMPLE:
31242 case TYPE_VECCOMPLEX:
31246 case TYPE_VECFLOAT:
31248 case TYPE_VECDOUBLE:
31256 /* Returns whether the dependence between INSN and NEXT is considered
31257 costly by the given target. */
31260 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
31264 rtx load_mem, str_mem;
31266 /* If the flag is not enabled - no dependence is considered costly;
31267 allow all dependent insns in the same group.
31268 This is the most aggressive option. */
31269 if (rs6000_sched_costly_dep == no_dep_costly)
31272 /* If the flag is set to 1 - a dependence is always considered costly;
31273 do not allow dependent instructions in the same group.
31274 This is the most conservative option. */
31275 if (rs6000_sched_costly_dep == all_deps_costly)
31278 insn = DEP_PRO (dep);
31279 next = DEP_CON (dep);
31281 if (rs6000_sched_costly_dep == store_to_load_dep_costly
31282 && is_load_insn (next, &load_mem)
31283 && is_store_insn (insn, &str_mem))
31284 /* Prevent load after store in the same group. */
31287 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
31288 && is_load_insn (next, &load_mem)
31289 && is_store_insn (insn, &str_mem)
31290 && DEP_TYPE (dep) == REG_DEP_TRUE
31291 && mem_locations_overlap(str_mem, load_mem))
31292 /* Prevent load after store in the same group if it is a true
31296 /* The flag is set to X; dependences with latency >= X are considered costly,
31297 and will not be scheduled in the same group. */
31298 if (rs6000_sched_costly_dep <= max_dep_latency
31299 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
31305 /* Return the next insn after INSN that is found before TAIL is reached,
31306 skipping any "non-active" insns - insns that will not actually occupy
31307 an issue slot. Return NULL_RTX if such an insn is not found. */
31310 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
31312 if (insn == NULL_RTX || insn == tail)
31317 insn = NEXT_INSN (insn);
31318 if (insn == NULL_RTX || insn == tail)
31322 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
31323 || (NONJUMP_INSN_P (insn)
31324 && GET_CODE (PATTERN (insn)) != USE
31325 && GET_CODE (PATTERN (insn)) != CLOBBER
31326 && INSN_CODE (insn) != CODE_FOR_stack_tie))
31332 /* Do Power9 specific sched_reorder2 reordering of ready list. */
31335 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
31340 enum attr_type type, type2;
31342 type = get_attr_type (last_scheduled_insn);
31344 /* Try to issue fixed point divides back-to-back in pairs so they will be
31345 routed to separate execution units and execute in parallel. */
31346 if (type == TYPE_DIV && divide_cnt == 0)
31348 /* First divide has been scheduled. */
31351 /* Scan the ready list looking for another divide, if found move it
31352 to the end of the list so it is chosen next. */
31356 if (recog_memoized (ready[pos]) >= 0
31357 && get_attr_type (ready[pos]) == TYPE_DIV)
31360 for (i = pos; i < lastpos; i++)
31361 ready[i] = ready[i + 1];
31362 ready[lastpos] = tmp;
31370 /* Last insn was the 2nd divide or not a divide, reset the counter. */
31373 /* The best dispatch throughput for vector and vector load insns can be
31374 achieved by interleaving a vector and vector load such that they'll
31375 dispatch to the same superslice. If this pairing cannot be achieved
31376 then it is best to pair vector insns together and vector load insns
31379 To aid in this pairing, vec_pairing maintains the current state with
31380 the following values:
31382 0 : Initial state, no vecload/vector pairing has been started.
31384 1 : A vecload or vector insn has been issued and a candidate for
31385 pairing has been found and moved to the end of the ready
31387 if (type == TYPE_VECLOAD)
31389 /* Issued a vecload. */
31390 if (vec_pairing == 0)
31392 int vecload_pos = -1;
31393 /* We issued a single vecload, look for a vector insn to pair it
31394 with. If one isn't found, try to pair another vecload. */
31398 if (recog_memoized (ready[pos]) >= 0)
31400 type2 = get_attr_type (ready[pos]);
31401 if (is_power9_pairable_vec_type (type2))
31403 /* Found a vector insn to pair with, move it to the
31404 end of the ready list so it is scheduled next. */
31406 for (i = pos; i < lastpos; i++)
31407 ready[i] = ready[i + 1];
31408 ready[lastpos] = tmp;
31410 return cached_can_issue_more;
31412 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
31413 /* Remember position of first vecload seen. */
31418 if (vecload_pos >= 0)
31420 /* Didn't find a vector to pair with but did find a vecload,
31421 move it to the end of the ready list. */
31422 tmp = ready[vecload_pos];
31423 for (i = vecload_pos; i < lastpos; i++)
31424 ready[i] = ready[i + 1];
31425 ready[lastpos] = tmp;
31427 return cached_can_issue_more;
31431 else if (is_power9_pairable_vec_type (type))
31433 /* Issued a vector operation. */
31434 if (vec_pairing == 0)
31437 /* We issued a single vector insn, look for a vecload to pair it
31438 with. If one isn't found, try to pair another vector. */
31442 if (recog_memoized (ready[pos]) >= 0)
31444 type2 = get_attr_type (ready[pos]);
31445 if (type2 == TYPE_VECLOAD)
31447 /* Found a vecload insn to pair with, move it to the
31448 end of the ready list so it is scheduled next. */
31450 for (i = pos; i < lastpos; i++)
31451 ready[i] = ready[i + 1];
31452 ready[lastpos] = tmp;
31454 return cached_can_issue_more;
31456 else if (is_power9_pairable_vec_type (type2)
31458 /* Remember position of first vector insn seen. */
31465 /* Didn't find a vecload to pair with but did find a vector
31466 insn, move it to the end of the ready list. */
31467 tmp = ready[vec_pos];
31468 for (i = vec_pos; i < lastpos; i++)
31469 ready[i] = ready[i + 1];
31470 ready[lastpos] = tmp;
31472 return cached_can_issue_more;
31477 /* We've either finished a vec/vecload pair, couldn't find an insn to
31478 continue the current pair, or the last insn had nothing to do with
31479 with pairing. In any case, reset the state. */
31483 return cached_can_issue_more;
31486 /* We are about to begin issuing insns for this clock cycle. */
31489 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
31490 rtx_insn **ready ATTRIBUTE_UNUSED,
31491 int *pn_ready ATTRIBUTE_UNUSED,
31492 int clock_var ATTRIBUTE_UNUSED)
31494 int n_ready = *pn_ready;
31497 fprintf (dump, "// rs6000_sched_reorder :\n");
31499 /* Reorder the ready list, if the second to last ready insn
31500 is a nonepipeline insn. */
31501 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
31503 if (is_nonpipeline_insn (ready[n_ready - 1])
31504 && (recog_memoized (ready[n_ready - 2]) > 0))
31505 /* Simply swap first two insns. */
31506 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
31509 if (rs6000_tune == PROCESSOR_POWER6)
31510 load_store_pendulum = 0;
31512 return rs6000_issue_rate ();
31515 /* Like rs6000_sched_reorder, but called after issuing each insn. */
31518 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
31519 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
31522 fprintf (dump, "// rs6000_sched_reorder2 :\n");
31524 /* For Power6, we need to handle some special cases to try and keep the
31525 store queue from overflowing and triggering expensive flushes.
31527 This code monitors how load and store instructions are being issued
31528 and skews the ready list one way or the other to increase the likelihood
31529 that a desired instruction is issued at the proper time.
31531 A couple of things are done. First, we maintain a "load_store_pendulum"
31532 to track the current state of load/store issue.
31534 - If the pendulum is at zero, then no loads or stores have been
31535 issued in the current cycle so we do nothing.
31537 - If the pendulum is 1, then a single load has been issued in this
31538 cycle and we attempt to locate another load in the ready list to
31541 - If the pendulum is -2, then two stores have already been
31542 issued in this cycle, so we increase the priority of the first load
31543 in the ready list to increase it's likelihood of being chosen first
31546 - If the pendulum is -1, then a single store has been issued in this
31547 cycle and we attempt to locate another store in the ready list to
31548 issue with it, preferring a store to an adjacent memory location to
31549 facilitate store pairing in the store queue.
31551 - If the pendulum is 2, then two loads have already been
31552 issued in this cycle, so we increase the priority of the first store
31553 in the ready list to increase it's likelihood of being chosen first
31556 - If the pendulum < -2 or > 2, then do nothing.
31558 Note: This code covers the most common scenarios. There exist non
31559 load/store instructions which make use of the LSU and which
31560 would need to be accounted for to strictly model the behavior
31561 of the machine. Those instructions are currently unaccounted
31562 for to help minimize compile time overhead of this code.
31564 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
31569 rtx load_mem, str_mem;
31571 if (is_store_insn (last_scheduled_insn, &str_mem))
31572 /* Issuing a store, swing the load_store_pendulum to the left */
31573 load_store_pendulum--;
31574 else if (is_load_insn (last_scheduled_insn, &load_mem))
31575 /* Issuing a load, swing the load_store_pendulum to the right */
31576 load_store_pendulum++;
31578 return cached_can_issue_more;
31580 /* If the pendulum is balanced, or there is only one instruction on
31581 the ready list, then all is well, so return. */
31582 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
31583 return cached_can_issue_more;
31585 if (load_store_pendulum == 1)
31587 /* A load has been issued in this cycle. Scan the ready list
31588 for another load to issue with it */
31593 if (is_load_insn (ready[pos], &load_mem))
31595 /* Found a load. Move it to the head of the ready list,
31596 and adjust it's priority so that it is more likely to
31599 for (i=pos; i<*pn_ready-1; i++)
31600 ready[i] = ready[i + 1];
31601 ready[*pn_ready-1] = tmp;
31603 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31604 INSN_PRIORITY (tmp)++;
31610 else if (load_store_pendulum == -2)
31612 /* Two stores have been issued in this cycle. Increase the
31613 priority of the first load in the ready list to favor it for
31614 issuing in the next cycle. */
31619 if (is_load_insn (ready[pos], &load_mem)
31621 && INSN_PRIORITY_KNOWN (ready[pos]))
31623 INSN_PRIORITY (ready[pos])++;
31625 /* Adjust the pendulum to account for the fact that a load
31626 was found and increased in priority. This is to prevent
31627 increasing the priority of multiple loads */
31628 load_store_pendulum--;
31635 else if (load_store_pendulum == -1)
31637 /* A store has been issued in this cycle. Scan the ready list for
31638 another store to issue with it, preferring a store to an adjacent
31640 int first_store_pos = -1;
31646 if (is_store_insn (ready[pos], &str_mem))
31649 /* Maintain the index of the first store found on the
31651 if (first_store_pos == -1)
31652 first_store_pos = pos;
31654 if (is_store_insn (last_scheduled_insn, &str_mem2)
31655 && adjacent_mem_locations (str_mem, str_mem2))
31657 /* Found an adjacent store. Move it to the head of the
31658 ready list, and adjust it's priority so that it is
31659 more likely to stay there */
31661 for (i=pos; i<*pn_ready-1; i++)
31662 ready[i] = ready[i + 1];
31663 ready[*pn_ready-1] = tmp;
31665 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31666 INSN_PRIORITY (tmp)++;
31668 first_store_pos = -1;
31676 if (first_store_pos >= 0)
31678 /* An adjacent store wasn't found, but a non-adjacent store was,
31679 so move the non-adjacent store to the front of the ready
31680 list, and adjust its priority so that it is more likely to
31682 tmp = ready[first_store_pos];
31683 for (i=first_store_pos; i<*pn_ready-1; i++)
31684 ready[i] = ready[i + 1];
31685 ready[*pn_ready-1] = tmp;
31686 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31687 INSN_PRIORITY (tmp)++;
31690 else if (load_store_pendulum == 2)
31692 /* Two loads have been issued in this cycle. Increase the priority
31693 of the first store in the ready list to favor it for issuing in
31699 if (is_store_insn (ready[pos], &str_mem)
31701 && INSN_PRIORITY_KNOWN (ready[pos]))
31703 INSN_PRIORITY (ready[pos])++;
31705 /* Adjust the pendulum to account for the fact that a store
31706 was found and increased in priority. This is to prevent
31707 increasing the priority of multiple stores */
31708 load_store_pendulum++;
31717 /* Do Power9 dependent reordering if necessary. */
31718 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
31719 && recog_memoized (last_scheduled_insn) >= 0)
31720 return power9_sched_reorder2 (ready, *pn_ready - 1);
31722 return cached_can_issue_more;
31725 /* Return whether the presence of INSN causes a dispatch group termination
31726 of group WHICH_GROUP.
31728 If WHICH_GROUP == current_group, this function will return true if INSN
31729 causes the termination of the current group (i.e, the dispatch group to
31730 which INSN belongs). This means that INSN will be the last insn in the
31731 group it belongs to.
31733 If WHICH_GROUP == previous_group, this function will return true if INSN
31734 causes the termination of the previous group (i.e, the dispatch group that
31735 precedes the group to which INSN belongs). This means that INSN will be
31736 the first insn in the group it belongs to). */
31739 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
31746 first = insn_must_be_first_in_group (insn);
31747 last = insn_must_be_last_in_group (insn);
31752 if (which_group == current_group)
31754 else if (which_group == previous_group)
31762 insn_must_be_first_in_group (rtx_insn *insn)
31764 enum attr_type type;
31768 || DEBUG_INSN_P (insn)
31769 || GET_CODE (PATTERN (insn)) == USE
31770 || GET_CODE (PATTERN (insn)) == CLOBBER)
31773 switch (rs6000_tune)
31775 case PROCESSOR_POWER5:
31776 if (is_cracked_insn (insn))
31779 case PROCESSOR_POWER4:
31780 if (is_microcoded_insn (insn))
31783 if (!rs6000_sched_groups)
31786 type = get_attr_type (insn);
31793 case TYPE_CR_LOGICAL:
31806 case PROCESSOR_POWER6:
31807 type = get_attr_type (insn);
31816 case TYPE_FPCOMPARE:
31827 if (get_attr_dot (insn) == DOT_NO
31828 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31833 if (get_attr_size (insn) == SIZE_32)
31841 if (get_attr_update (insn) == UPDATE_YES)
31849 case PROCESSOR_POWER7:
31850 type = get_attr_type (insn);
31854 case TYPE_CR_LOGICAL:
31868 if (get_attr_dot (insn) == DOT_YES)
31873 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31874 || get_attr_update (insn) == UPDATE_YES)
31881 if (get_attr_update (insn) == UPDATE_YES)
31889 case PROCESSOR_POWER8:
31890 type = get_attr_type (insn);
31894 case TYPE_CR_LOGICAL:
31902 case TYPE_VECSTORE:
31909 if (get_attr_dot (insn) == DOT_YES)
31914 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31915 || get_attr_update (insn) == UPDATE_YES)
31920 if (get_attr_update (insn) == UPDATE_YES
31921 && get_attr_indexed (insn) == INDEXED_YES)
31937 insn_must_be_last_in_group (rtx_insn *insn)
31939 enum attr_type type;
31943 || DEBUG_INSN_P (insn)
31944 || GET_CODE (PATTERN (insn)) == USE
31945 || GET_CODE (PATTERN (insn)) == CLOBBER)
31948 switch (rs6000_tune) {
31949 case PROCESSOR_POWER4:
31950 case PROCESSOR_POWER5:
31951 if (is_microcoded_insn (insn))
31954 if (is_branch_slot_insn (insn))
31958 case PROCESSOR_POWER6:
31959 type = get_attr_type (insn);
31967 case TYPE_FPCOMPARE:
31978 if (get_attr_dot (insn) == DOT_NO
31979 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31984 if (get_attr_size (insn) == SIZE_32)
31992 case PROCESSOR_POWER7:
31993 type = get_attr_type (insn);
32003 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32004 && get_attr_update (insn) == UPDATE_YES)
32009 if (get_attr_update (insn) == UPDATE_YES
32010 && get_attr_indexed (insn) == INDEXED_YES)
32018 case PROCESSOR_POWER8:
32019 type = get_attr_type (insn);
32031 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32032 && get_attr_update (insn) == UPDATE_YES)
32037 if (get_attr_update (insn) == UPDATE_YES
32038 && get_attr_indexed (insn) == INDEXED_YES)
32053 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
32054 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
32057 is_costly_group (rtx *group_insns, rtx next_insn)
32060 int issue_rate = rs6000_issue_rate ();
32062 for (i = 0; i < issue_rate; i++)
32064 sd_iterator_def sd_it;
32066 rtx insn = group_insns[i];
32071 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
32073 rtx next = DEP_CON (dep);
32075 if (next == next_insn
32076 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
32084 /* Utility of the function redefine_groups.
32085 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
32086 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
32087 to keep it "far" (in a separate group) from GROUP_INSNS, following
32088 one of the following schemes, depending on the value of the flag
32089 -minsert_sched_nops = X:
32090 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
32091 in order to force NEXT_INSN into a separate group.
32092 (2) X < sched_finish_regroup_exact: insert exactly X nops.
32093 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
32094 insertion (has a group just ended, how many vacant issue slots remain in the
32095 last group, and how many dispatch groups were encountered so far). */
32098 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
32099 rtx_insn *next_insn, bool *group_end, int can_issue_more,
32104 int issue_rate = rs6000_issue_rate ();
32105 bool end = *group_end;
32108 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
32109 return can_issue_more;
32111 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
32112 return can_issue_more;
32114 force = is_costly_group (group_insns, next_insn);
32116 return can_issue_more;
32118 if (sched_verbose > 6)
32119 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
32120 *group_count ,can_issue_more);
32122 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
32125 can_issue_more = 0;
32127 /* Since only a branch can be issued in the last issue_slot, it is
32128 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
32129 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
32130 in this case the last nop will start a new group and the branch
32131 will be forced to the new group. */
32132 if (can_issue_more && !is_branch_slot_insn (next_insn))
32135 /* Do we have a special group ending nop? */
32136 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
32137 || rs6000_tune == PROCESSOR_POWER8)
32139 nop = gen_group_ending_nop ();
32140 emit_insn_before (nop, next_insn);
32141 can_issue_more = 0;
32144 while (can_issue_more > 0)
32147 emit_insn_before (nop, next_insn);
32155 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
32157 int n_nops = rs6000_sched_insert_nops;
32159 /* Nops can't be issued from the branch slot, so the effective
32160 issue_rate for nops is 'issue_rate - 1'. */
32161 if (can_issue_more == 0)
32162 can_issue_more = issue_rate;
32164 if (can_issue_more == 0)
32166 can_issue_more = issue_rate - 1;
32169 for (i = 0; i < issue_rate; i++)
32171 group_insns[i] = 0;
32178 emit_insn_before (nop, next_insn);
32179 if (can_issue_more == issue_rate - 1) /* new group begins */
32182 if (can_issue_more == 0)
32184 can_issue_more = issue_rate - 1;
32187 for (i = 0; i < issue_rate; i++)
32189 group_insns[i] = 0;
32195 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
32198 /* Is next_insn going to start a new group? */
32201 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
32202 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
32203 || (can_issue_more < issue_rate &&
32204 insn_terminates_group_p (next_insn, previous_group)));
32205 if (*group_end && end)
32208 if (sched_verbose > 6)
32209 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
32210 *group_count, can_issue_more);
32211 return can_issue_more;
32214 return can_issue_more;
32217 /* This function tries to synch the dispatch groups that the compiler "sees"
32218 with the dispatch groups that the processor dispatcher is expected to
32219 form in practice. It tries to achieve this synchronization by forcing the
32220 estimated processor grouping on the compiler (as opposed to the function
32221 'pad_goups' which tries to force the scheduler's grouping on the processor).
32223 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
32224 examines the (estimated) dispatch groups that will be formed by the processor
32225 dispatcher. It marks these group boundaries to reflect the estimated
32226 processor grouping, overriding the grouping that the scheduler had marked.
32227 Depending on the value of the flag '-minsert-sched-nops' this function can
32228 force certain insns into separate groups or force a certain distance between
32229 them by inserting nops, for example, if there exists a "costly dependence"
32232 The function estimates the group boundaries that the processor will form as
32233 follows: It keeps track of how many vacant issue slots are available after
32234 each insn. A subsequent insn will start a new group if one of the following
32236 - no more vacant issue slots remain in the current dispatch group.
32237 - only the last issue slot, which is the branch slot, is vacant, but the next
32238 insn is not a branch.
32239 - only the last 2 or less issue slots, including the branch slot, are vacant,
32240 which means that a cracked insn (which occupies two issue slots) can't be
32241 issued in this group.
32242 - less than 'issue_rate' slots are vacant, and the next insn always needs to
32243 start a new group. */
32246 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32249 rtx_insn *insn, *next_insn;
32251 int can_issue_more;
32254 int group_count = 0;
32258 issue_rate = rs6000_issue_rate ();
32259 group_insns = XALLOCAVEC (rtx, issue_rate);
32260 for (i = 0; i < issue_rate; i++)
32262 group_insns[i] = 0;
32264 can_issue_more = issue_rate;
32266 insn = get_next_active_insn (prev_head_insn, tail);
32269 while (insn != NULL_RTX)
32271 slot = (issue_rate - can_issue_more);
32272 group_insns[slot] = insn;
32274 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32275 if (insn_terminates_group_p (insn, current_group))
32276 can_issue_more = 0;
32278 next_insn = get_next_active_insn (insn, tail);
32279 if (next_insn == NULL_RTX)
32280 return group_count + 1;
32282 /* Is next_insn going to start a new group? */
32284 = (can_issue_more == 0
32285 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
32286 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
32287 || (can_issue_more < issue_rate &&
32288 insn_terminates_group_p (next_insn, previous_group)));
32290 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
32291 next_insn, &group_end, can_issue_more,
32297 can_issue_more = 0;
32298 for (i = 0; i < issue_rate; i++)
32300 group_insns[i] = 0;
32304 if (GET_MODE (next_insn) == TImode && can_issue_more)
32305 PUT_MODE (next_insn, VOIDmode);
32306 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
32307 PUT_MODE (next_insn, TImode);
32310 if (can_issue_more == 0)
32311 can_issue_more = issue_rate;
32314 return group_count;
32317 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
32318 dispatch group boundaries that the scheduler had marked. Pad with nops
32319 any dispatch groups which have vacant issue slots, in order to force the
32320 scheduler's grouping on the processor dispatcher. The function
32321 returns the number of dispatch groups found. */
32324 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32327 rtx_insn *insn, *next_insn;
32330 int can_issue_more;
32332 int group_count = 0;
32334 /* Initialize issue_rate. */
32335 issue_rate = rs6000_issue_rate ();
32336 can_issue_more = issue_rate;
32338 insn = get_next_active_insn (prev_head_insn, tail);
32339 next_insn = get_next_active_insn (insn, tail);
32341 while (insn != NULL_RTX)
32344 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32346 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
32348 if (next_insn == NULL_RTX)
32353 /* If the scheduler had marked group termination at this location
32354 (between insn and next_insn), and neither insn nor next_insn will
32355 force group termination, pad the group with nops to force group
32358 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
32359 && !insn_terminates_group_p (insn, current_group)
32360 && !insn_terminates_group_p (next_insn, previous_group))
32362 if (!is_branch_slot_insn (next_insn))
32365 while (can_issue_more)
32368 emit_insn_before (nop, next_insn);
32373 can_issue_more = issue_rate;
32378 next_insn = get_next_active_insn (insn, tail);
32381 return group_count;
32384 /* We're beginning a new block. Initialize data structures as necessary. */
32387 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
32388 int sched_verbose ATTRIBUTE_UNUSED,
32389 int max_ready ATTRIBUTE_UNUSED)
32391 last_scheduled_insn = NULL;
32392 load_store_pendulum = 0;
32397 /* The following function is called at the end of scheduling BB.
32398 After reload, it inserts nops at insn group bundling. */
32401 rs6000_sched_finish (FILE *dump, int sched_verbose)
32406 fprintf (dump, "=== Finishing schedule.\n");
32408 if (reload_completed && rs6000_sched_groups)
32410 /* Do not run sched_finish hook when selective scheduling enabled. */
32411 if (sel_sched_p ())
32414 if (rs6000_sched_insert_nops == sched_finish_none)
32417 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
32418 n_groups = pad_groups (dump, sched_verbose,
32419 current_sched_info->prev_head,
32420 current_sched_info->next_tail);
32422 n_groups = redefine_groups (dump, sched_verbose,
32423 current_sched_info->prev_head,
32424 current_sched_info->next_tail);
32426 if (sched_verbose >= 6)
32428 fprintf (dump, "ngroups = %d\n", n_groups);
32429 print_rtl (dump, current_sched_info->prev_head);
32430 fprintf (dump, "Done finish_sched\n");
32435 struct rs6000_sched_context
32437 short cached_can_issue_more;
32438 rtx_insn *last_scheduled_insn;
32439 int load_store_pendulum;
32444 typedef struct rs6000_sched_context rs6000_sched_context_def;
32445 typedef rs6000_sched_context_def *rs6000_sched_context_t;
32447 /* Allocate store for new scheduling context. */
32449 rs6000_alloc_sched_context (void)
32451 return xmalloc (sizeof (rs6000_sched_context_def));
32454 /* If CLEAN_P is true then initializes _SC with clean data,
32455 and from the global context otherwise. */
32457 rs6000_init_sched_context (void *_sc, bool clean_p)
32459 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32463 sc->cached_can_issue_more = 0;
32464 sc->last_scheduled_insn = NULL;
32465 sc->load_store_pendulum = 0;
32466 sc->divide_cnt = 0;
32467 sc->vec_pairing = 0;
32471 sc->cached_can_issue_more = cached_can_issue_more;
32472 sc->last_scheduled_insn = last_scheduled_insn;
32473 sc->load_store_pendulum = load_store_pendulum;
32474 sc->divide_cnt = divide_cnt;
32475 sc->vec_pairing = vec_pairing;
32479 /* Sets the global scheduling context to the one pointed to by _SC. */
32481 rs6000_set_sched_context (void *_sc)
32483 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32485 gcc_assert (sc != NULL);
32487 cached_can_issue_more = sc->cached_can_issue_more;
32488 last_scheduled_insn = sc->last_scheduled_insn;
32489 load_store_pendulum = sc->load_store_pendulum;
32490 divide_cnt = sc->divide_cnt;
32491 vec_pairing = sc->vec_pairing;
32496 rs6000_free_sched_context (void *_sc)
32498 gcc_assert (_sc != NULL);
32504 rs6000_sched_can_speculate_insn (rtx_insn *insn)
32506 switch (get_attr_type (insn))
32521 /* Length in units of the trampoline for entering a nested function. */
32524 rs6000_trampoline_size (void)
32528 switch (DEFAULT_ABI)
32531 gcc_unreachable ();
32534 ret = (TARGET_32BIT) ? 12 : 24;
32538 gcc_assert (!TARGET_32BIT);
32544 ret = (TARGET_32BIT) ? 40 : 48;
32551 /* Emit RTL insns to initialize the variable parts of a trampoline.
32552 FNADDR is an RTX for the address of the function's pure code.
32553 CXT is an RTX for the static chain value for the function. */
32556 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
32558 int regsize = (TARGET_32BIT) ? 4 : 8;
32559 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
32560 rtx ctx_reg = force_reg (Pmode, cxt);
32561 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
32563 switch (DEFAULT_ABI)
32566 gcc_unreachable ();
32568 /* Under AIX, just build the 3 word function descriptor */
32571 rtx fnmem, fn_reg, toc_reg;
32573 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
32574 error ("you cannot take the address of a nested function if you use "
32575 "the %qs option", "-mno-pointers-to-nested-functions");
32577 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
32578 fn_reg = gen_reg_rtx (Pmode);
32579 toc_reg = gen_reg_rtx (Pmode);
32581 /* Macro to shorten the code expansions below. */
32582 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
32584 m_tramp = replace_equiv_address (m_tramp, addr);
32586 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
32587 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
32588 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
32589 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
32590 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
32596 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
32600 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
32601 LCT_NORMAL, VOIDmode,
32603 GEN_INT (rs6000_trampoline_size ()), SImode,
32611 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
32612 identifier as an argument, so the front end shouldn't look it up. */
32615 rs6000_attribute_takes_identifier_p (const_tree attr_id)
32617 return is_attribute_p ("altivec", attr_id);
32620 /* Handle the "altivec" attribute. The attribute may have
32621 arguments as follows:
32623 __attribute__((altivec(vector__)))
32624 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
32625 __attribute__((altivec(bool__))) (always followed by 'unsigned')
32627 and may appear more than once (e.g., 'vector bool char') in a
32628 given declaration. */
32631 rs6000_handle_altivec_attribute (tree *node,
32632 tree name ATTRIBUTE_UNUSED,
32634 int flags ATTRIBUTE_UNUSED,
32635 bool *no_add_attrs)
32637 tree type = *node, result = NULL_TREE;
32641 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
32642 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
32643 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
32646 while (POINTER_TYPE_P (type)
32647 || TREE_CODE (type) == FUNCTION_TYPE
32648 || TREE_CODE (type) == METHOD_TYPE
32649 || TREE_CODE (type) == ARRAY_TYPE)
32650 type = TREE_TYPE (type);
32652 mode = TYPE_MODE (type);
32654 /* Check for invalid AltiVec type qualifiers. */
32655 if (type == long_double_type_node)
32656 error ("use of %<long double%> in AltiVec types is invalid");
32657 else if (type == boolean_type_node)
32658 error ("use of boolean types in AltiVec types is invalid");
32659 else if (TREE_CODE (type) == COMPLEX_TYPE)
32660 error ("use of %<complex%> in AltiVec types is invalid");
32661 else if (DECIMAL_FLOAT_MODE_P (mode))
32662 error ("use of decimal floating point types in AltiVec types is invalid");
32663 else if (!TARGET_VSX)
32665 if (type == long_unsigned_type_node || type == long_integer_type_node)
32668 error ("use of %<long%> in AltiVec types is invalid for "
32669 "64-bit code without %qs", "-mvsx");
32670 else if (rs6000_warn_altivec_long)
32671 warning (0, "use of %<long%> in AltiVec types is deprecated; "
32674 else if (type == long_long_unsigned_type_node
32675 || type == long_long_integer_type_node)
32676 error ("use of %<long long%> in AltiVec types is invalid without %qs",
32678 else if (type == double_type_node)
32679 error ("use of %<double%> in AltiVec types is invalid without %qs",
32683 switch (altivec_type)
32686 unsigned_p = TYPE_UNSIGNED (type);
32690 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
32693 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
32696 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
32699 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
32702 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
32704 case E_SFmode: result = V4SF_type_node; break;
32705 case E_DFmode: result = V2DF_type_node; break;
32706 /* If the user says 'vector int bool', we may be handed the 'bool'
32707 attribute _before_ the 'vector' attribute, and so select the
32708 proper type in the 'b' case below. */
32709 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
32710 case E_V2DImode: case E_V2DFmode:
32718 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
32719 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
32720 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
32721 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
32728 case E_V8HImode: result = pixel_V8HI_type_node;
32734 /* Propagate qualifiers attached to the element type
32735 onto the vector type. */
32736 if (result && result != type && TYPE_QUALS (type))
32737 result = build_qualified_type (result, TYPE_QUALS (type));
32739 *no_add_attrs = true; /* No need to hang on to the attribute. */
32742 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
32747 /* AltiVec defines five built-in scalar types that serve as vector
32748 elements; we must teach the compiler how to mangle them. The 128-bit
32749 floating point mangling is target-specific as well. */
32751 static const char *
32752 rs6000_mangle_type (const_tree type)
32754 type = TYPE_MAIN_VARIANT (type);
32756 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32757 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32760 if (type == bool_char_type_node) return "U6__boolc";
32761 if (type == bool_short_type_node) return "U6__bools";
32762 if (type == pixel_type_node) return "u7__pixel";
32763 if (type == bool_int_type_node) return "U6__booli";
32764 if (type == bool_long_long_type_node) return "U6__boolx";
32766 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
32768 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
32769 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
32771 /* For all other types, use the default mangling. */
32775 /* Handle a "longcall" or "shortcall" attribute; arguments as in
32776 struct attribute_spec.handler. */
32779 rs6000_handle_longcall_attribute (tree *node, tree name,
32780 tree args ATTRIBUTE_UNUSED,
32781 int flags ATTRIBUTE_UNUSED,
32782 bool *no_add_attrs)
32784 if (TREE_CODE (*node) != FUNCTION_TYPE
32785 && TREE_CODE (*node) != FIELD_DECL
32786 && TREE_CODE (*node) != TYPE_DECL)
32788 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32790 *no_add_attrs = true;
32796 /* Set longcall attributes on all functions declared when
32797 rs6000_default_long_calls is true. */
32799 rs6000_set_default_type_attributes (tree type)
32801 if (rs6000_default_long_calls
32802 && (TREE_CODE (type) == FUNCTION_TYPE
32803 || TREE_CODE (type) == METHOD_TYPE))
32804 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
32806 TYPE_ATTRIBUTES (type));
32809 darwin_set_default_type_attributes (type);
32813 /* Return a reference suitable for calling a function with the
32814 longcall attribute. */
32817 rs6000_longcall_ref (rtx call_ref, rtx arg)
32819 /* System V adds '.' to the internal name, so skip them. */
32820 const char *call_name = XSTR (call_ref, 0);
32821 if (*call_name == '.')
32823 while (*call_name == '.')
32826 tree node = get_identifier (call_name);
32827 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
32831 && (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4))
32833 rtx base = const0_rtx;
32835 if (DEFAULT_ABI == ABI_ELFv2)
32837 base = gen_rtx_REG (Pmode, TOC_REGISTER);
32843 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
32846 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
32847 may be used by a function global entry point. For SysV4, r11
32848 is used by __glink_PLTresolve lazy resolver entry. */
32849 rtx reg = gen_rtx_REG (Pmode, regno);
32850 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
32852 rtx lo = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, reg, call_ref, arg),
32854 emit_insn (gen_rtx_SET (reg, hi));
32855 emit_insn (gen_rtx_SET (reg, lo));
32859 return force_reg (Pmode, call_ref);
32862 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
32863 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
32866 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32867 struct attribute_spec.handler. */
32869 rs6000_handle_struct_attribute (tree *node, tree name,
32870 tree args ATTRIBUTE_UNUSED,
32871 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32874 if (DECL_P (*node))
32876 if (TREE_CODE (*node) == TYPE_DECL)
32877 type = &TREE_TYPE (*node);
32882 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
32883 || TREE_CODE (*type) == UNION_TYPE)))
32885 warning (OPT_Wattributes, "%qE attribute ignored", name);
32886 *no_add_attrs = true;
32889 else if ((is_attribute_p ("ms_struct", name)
32890 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
32891 || ((is_attribute_p ("gcc_struct", name)
32892 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
32894 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
32896 *no_add_attrs = true;
32903 rs6000_ms_bitfield_layout_p (const_tree record_type)
32905 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
32906 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
32907 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
32910 #ifdef USING_ELFOS_H
32912 /* A get_unnamed_section callback, used for switching to toc_section. */
32915 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
32917 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32918 && TARGET_MINIMAL_TOC)
32920 if (!toc_initialized)
32922 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32923 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32924 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
32925 fprintf (asm_out_file, "\t.tc ");
32926 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
32927 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32928 fprintf (asm_out_file, "\n");
32930 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32931 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32932 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32933 fprintf (asm_out_file, " = .+32768\n");
32934 toc_initialized = 1;
32937 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32939 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32941 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32942 if (!toc_initialized)
32944 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32945 toc_initialized = 1;
32950 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32951 if (!toc_initialized)
32953 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32954 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32955 fprintf (asm_out_file, " = .+32768\n");
32956 toc_initialized = 1;
32961 /* Implement TARGET_ASM_INIT_SECTIONS. */
32964 rs6000_elf_asm_init_sections (void)
32967 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
32970 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
32971 SDATA2_SECTION_ASM_OP);
32974 /* Implement TARGET_SELECT_RTX_SECTION. */
32977 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
32978 unsigned HOST_WIDE_INT align)
32980 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
32981 return toc_section;
32983 return default_elf_select_rtx_section (mode, x, align);
32986 /* For a SYMBOL_REF, set generic flags and then perform some
32987 target-specific processing.
32989 When the AIX ABI is requested on a non-AIX system, replace the
32990 function name with the real name (with a leading .) rather than the
32991 function descriptor name. This saves a lot of overriding code to
32992 read the prefixes. */
32994 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
32996 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
32998 default_encode_section_info (decl, rtl, first);
33001 && TREE_CODE (decl) == FUNCTION_DECL
33003 && DEFAULT_ABI == ABI_AIX)
33005 rtx sym_ref = XEXP (rtl, 0);
33006 size_t len = strlen (XSTR (sym_ref, 0));
33007 char *str = XALLOCAVEC (char, len + 2);
33009 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
33010 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
33015 compare_section_name (const char *section, const char *templ)
33019 len = strlen (templ);
33020 return (strncmp (section, templ, len) == 0
33021 && (section[len] == 0 || section[len] == '.'));
33025 rs6000_elf_in_small_data_p (const_tree decl)
33027 if (rs6000_sdata == SDATA_NONE)
33030 /* We want to merge strings, so we never consider them small data. */
33031 if (TREE_CODE (decl) == STRING_CST)
33034 /* Functions are never in the small data area. */
33035 if (TREE_CODE (decl) == FUNCTION_DECL)
33038 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
33040 const char *section = DECL_SECTION_NAME (decl);
33041 if (compare_section_name (section, ".sdata")
33042 || compare_section_name (section, ".sdata2")
33043 || compare_section_name (section, ".gnu.linkonce.s")
33044 || compare_section_name (section, ".sbss")
33045 || compare_section_name (section, ".sbss2")
33046 || compare_section_name (section, ".gnu.linkonce.sb")
33047 || strcmp (section, ".PPC.EMB.sdata0") == 0
33048 || strcmp (section, ".PPC.EMB.sbss0") == 0)
33053 /* If we are told not to put readonly data in sdata, then don't. */
33054 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
33055 && !rs6000_readonly_in_sdata)
33058 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
33061 && size <= g_switch_value
33062 /* If it's not public, and we're not going to reference it there,
33063 there's no need to put it in the small data section. */
33064 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
33071 #endif /* USING_ELFOS_H */
33073 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
33076 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
33078 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
33081 /* Do not place thread-local symbols refs in the object blocks. */
33084 rs6000_use_blocks_for_decl_p (const_tree decl)
33086 return !DECL_THREAD_LOCAL_P (decl);
33089 /* Return a REG that occurs in ADDR with coefficient 1.
33090 ADDR can be effectively incremented by incrementing REG.
33092 r0 is special and we must not select it as an address
33093 register by this routine since our caller will try to
33094 increment the returned register via an "la" instruction. */
33097 find_addr_reg (rtx addr)
33099 while (GET_CODE (addr) == PLUS)
33101 if (REG_P (XEXP (addr, 0))
33102 && REGNO (XEXP (addr, 0)) != 0)
33103 addr = XEXP (addr, 0);
33104 else if (REG_P (XEXP (addr, 1))
33105 && REGNO (XEXP (addr, 1)) != 0)
33106 addr = XEXP (addr, 1);
33107 else if (CONSTANT_P (XEXP (addr, 0)))
33108 addr = XEXP (addr, 1);
33109 else if (CONSTANT_P (XEXP (addr, 1)))
33110 addr = XEXP (addr, 0);
33112 gcc_unreachable ();
33114 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
33119 rs6000_fatal_bad_address (rtx op)
33121 fatal_insn ("bad address", op);
33126 typedef struct branch_island_d {
33127 tree function_name;
33133 static vec<branch_island, va_gc> *branch_islands;
33135 /* Remember to generate a branch island for far calls to the given
33139 add_compiler_branch_island (tree label_name, tree function_name,
33142 branch_island bi = {function_name, label_name, line_number};
33143 vec_safe_push (branch_islands, bi);
33146 /* Generate far-jump branch islands for everything recorded in
33147 branch_islands. Invoked immediately after the last instruction of
33148 the epilogue has been emitted; the branch islands must be appended
33149 to, and contiguous with, the function body. Mach-O stubs are
33150 generated in machopic_output_stub(). */
33153 macho_branch_islands (void)
33157 while (!vec_safe_is_empty (branch_islands))
33159 branch_island *bi = &branch_islands->last ();
33160 const char *label = IDENTIFIER_POINTER (bi->label_name);
33161 const char *name = IDENTIFIER_POINTER (bi->function_name);
33162 char name_buf[512];
33163 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
33164 if (name[0] == '*' || name[0] == '&')
33165 strcpy (name_buf, name+1);
33169 strcpy (name_buf+1, name);
33171 strcpy (tmp_buf, "\n");
33172 strcat (tmp_buf, label);
33173 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
33174 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33175 dbxout_stabd (N_SLINE, bi->line_number);
33176 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33179 if (TARGET_LINK_STACK)
33182 get_ppc476_thunk_name (name);
33183 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
33184 strcat (tmp_buf, name);
33185 strcat (tmp_buf, "\n");
33186 strcat (tmp_buf, label);
33187 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
33191 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
33192 strcat (tmp_buf, label);
33193 strcat (tmp_buf, "_pic\n");
33194 strcat (tmp_buf, label);
33195 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
33198 strcat (tmp_buf, "\taddis r11,r11,ha16(");
33199 strcat (tmp_buf, name_buf);
33200 strcat (tmp_buf, " - ");
33201 strcat (tmp_buf, label);
33202 strcat (tmp_buf, "_pic)\n");
33204 strcat (tmp_buf, "\tmtlr r0\n");
33206 strcat (tmp_buf, "\taddi r12,r11,lo16(");
33207 strcat (tmp_buf, name_buf);
33208 strcat (tmp_buf, " - ");
33209 strcat (tmp_buf, label);
33210 strcat (tmp_buf, "_pic)\n");
33212 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
33216 strcat (tmp_buf, ":\nlis r12,hi16(");
33217 strcat (tmp_buf, name_buf);
33218 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
33219 strcat (tmp_buf, name_buf);
33220 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
33222 output_asm_insn (tmp_buf, 0);
33223 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
33224 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33225 dbxout_stabd (N_SLINE, bi->line_number);
33226 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33227 branch_islands->pop ();
33231 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
33232 already there or not. */
33235 no_previous_def (tree function_name)
33240 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33241 if (function_name == bi->function_name)
33246 /* GET_PREV_LABEL gets the label name from the previous definition of
33250 get_prev_label (tree function_name)
33255 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33256 if (function_name == bi->function_name)
33257 return bi->label_name;
33261 /* Generate PIC and indirect symbol stubs. */
33264 machopic_output_stub (FILE *file, const char *symb, const char *stub)
33266 unsigned int length;
33267 char *symbol_name, *lazy_ptr_name;
33268 char *local_label_0;
33269 static int label = 0;
33271 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
33272 symb = (*targetm.strip_name_encoding) (symb);
33275 length = strlen (symb);
33276 symbol_name = XALLOCAVEC (char, length + 32);
33277 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
33279 lazy_ptr_name = XALLOCAVEC (char, length + 32);
33280 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
33283 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
33285 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
33289 fprintf (file, "\t.align 5\n");
33291 fprintf (file, "%s:\n", stub);
33292 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33295 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
33296 sprintf (local_label_0, "\"L%011d$spb\"", label);
33298 fprintf (file, "\tmflr r0\n");
33299 if (TARGET_LINK_STACK)
33302 get_ppc476_thunk_name (name);
33303 fprintf (file, "\tbl %s\n", name);
33304 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33308 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
33309 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33311 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
33312 lazy_ptr_name, local_label_0);
33313 fprintf (file, "\tmtlr r0\n");
33314 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
33315 (TARGET_64BIT ? "ldu" : "lwzu"),
33316 lazy_ptr_name, local_label_0);
33317 fprintf (file, "\tmtctr r12\n");
33318 fprintf (file, "\tbctr\n");
33322 fprintf (file, "\t.align 4\n");
33324 fprintf (file, "%s:\n", stub);
33325 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33327 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
33328 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
33329 (TARGET_64BIT ? "ldu" : "lwzu"),
33331 fprintf (file, "\tmtctr r12\n");
33332 fprintf (file, "\tbctr\n");
33335 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
33336 fprintf (file, "%s:\n", lazy_ptr_name);
33337 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33338 fprintf (file, "%sdyld_stub_binding_helper\n",
33339 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
33342 /* Legitimize PIC addresses. If the address is already
33343 position-independent, we return ORIG. Newly generated
33344 position-independent addresses go into a reg. This is REG if non
33345 zero, otherwise we allocate register(s) as necessary. */
33347 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
33350 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
33355 if (reg == NULL && !reload_completed)
33356 reg = gen_reg_rtx (Pmode);
33358 if (GET_CODE (orig) == CONST)
33362 if (GET_CODE (XEXP (orig, 0)) == PLUS
33363 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
33366 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
33368 /* Use a different reg for the intermediate value, as
33369 it will be marked UNCHANGING. */
33370 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
33371 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
33374 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
33377 if (CONST_INT_P (offset))
33379 if (SMALL_INT (offset))
33380 return plus_constant (Pmode, base, INTVAL (offset));
33381 else if (!reload_completed)
33382 offset = force_reg (Pmode, offset);
33385 rtx mem = force_const_mem (Pmode, orig);
33386 return machopic_legitimize_pic_address (mem, Pmode, reg);
33389 return gen_rtx_PLUS (Pmode, base, offset);
33392 /* Fall back on generic machopic code. */
33393 return machopic_legitimize_pic_address (orig, mode, reg);
33396 /* Output a .machine directive for the Darwin assembler, and call
33397 the generic start_file routine. */
33400 rs6000_darwin_file_start (void)
33402 static const struct
33406 HOST_WIDE_INT if_set;
33408 { "ppc64", "ppc64", MASK_64BIT },
33409 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
33410 { "power4", "ppc970", 0 },
33411 { "G5", "ppc970", 0 },
33412 { "7450", "ppc7450", 0 },
33413 { "7400", "ppc7400", MASK_ALTIVEC },
33414 { "G4", "ppc7400", 0 },
33415 { "750", "ppc750", 0 },
33416 { "740", "ppc750", 0 },
33417 { "G3", "ppc750", 0 },
33418 { "604e", "ppc604e", 0 },
33419 { "604", "ppc604", 0 },
33420 { "603e", "ppc603", 0 },
33421 { "603", "ppc603", 0 },
33422 { "601", "ppc601", 0 },
33423 { NULL, "ppc", 0 } };
33424 const char *cpu_id = "";
33427 rs6000_file_start ();
33428 darwin_file_start ();
33430 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
33432 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
33433 cpu_id = rs6000_default_cpu;
33435 if (global_options_set.x_rs6000_cpu_index)
33436 cpu_id = processor_target_table[rs6000_cpu_index].name;
33438 /* Look through the mapping array. Pick the first name that either
33439 matches the argument, has a bit set in IF_SET that is also set
33440 in the target flags, or has a NULL name. */
33443 while (mapping[i].arg != NULL
33444 && strcmp (mapping[i].arg, cpu_id) != 0
33445 && (mapping[i].if_set & rs6000_isa_flags) == 0)
33448 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
33451 #endif /* TARGET_MACHO */
33455 rs6000_elf_reloc_rw_mask (void)
33459 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33465 /* Record an element in the table of global constructors. SYMBOL is
33466 a SYMBOL_REF of the function to be called; PRIORITY is a number
33467 between 0 and MAX_INIT_PRIORITY.
33469 This differs from default_named_section_asm_out_constructor in
33470 that we have special handling for -mrelocatable. */
33472 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
33474 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
33476 const char *section = ".ctors";
33479 if (priority != DEFAULT_INIT_PRIORITY)
33481 sprintf (buf, ".ctors.%.5u",
33482 /* Invert the numbering so the linker puts us in the proper
33483 order; constructors are run from right to left, and the
33484 linker sorts in increasing order. */
33485 MAX_INIT_PRIORITY - priority);
33489 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33490 assemble_align (POINTER_SIZE);
33492 if (DEFAULT_ABI == ABI_V4
33493 && (TARGET_RELOCATABLE || flag_pic > 1))
33495 fputs ("\t.long (", asm_out_file);
33496 output_addr_const (asm_out_file, symbol);
33497 fputs (")@fixup\n", asm_out_file);
33500 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33503 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
33505 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
33507 const char *section = ".dtors";
33510 if (priority != DEFAULT_INIT_PRIORITY)
33512 sprintf (buf, ".dtors.%.5u",
33513 /* Invert the numbering so the linker puts us in the proper
33514 order; constructors are run from right to left, and the
33515 linker sorts in increasing order. */
33516 MAX_INIT_PRIORITY - priority);
33520 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33521 assemble_align (POINTER_SIZE);
33523 if (DEFAULT_ABI == ABI_V4
33524 && (TARGET_RELOCATABLE || flag_pic > 1))
33526 fputs ("\t.long (", asm_out_file);
33527 output_addr_const (asm_out_file, symbol);
33528 fputs (")@fixup\n", asm_out_file);
33531 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33535 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
33537 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
33539 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
33540 ASM_OUTPUT_LABEL (file, name);
33541 fputs (DOUBLE_INT_ASM_OP, file);
33542 rs6000_output_function_entry (file, name);
33543 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
33546 fputs ("\t.size\t", file);
33547 assemble_name (file, name);
33548 fputs (",24\n\t.type\t.", file);
33549 assemble_name (file, name);
33550 fputs (",@function\n", file);
33551 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
33553 fputs ("\t.globl\t.", file);
33554 assemble_name (file, name);
33559 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33560 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33561 rs6000_output_function_entry (file, name);
33562 fputs (":\n", file);
33567 if (DEFAULT_ABI == ABI_V4
33568 && (TARGET_RELOCATABLE || flag_pic > 1)
33569 && !TARGET_SECURE_PLT
33570 && (!constant_pool_empty_p () || crtl->profile)
33571 && (uses_toc = uses_TOC ()))
33576 switch_to_other_text_partition ();
33577 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33579 fprintf (file, "\t.long ");
33580 assemble_name (file, toc_label_name);
33583 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33584 assemble_name (file, buf);
33587 switch_to_other_text_partition ();
33590 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33591 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33593 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
33597 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33599 fprintf (file, "\t.quad .TOC.-");
33600 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33601 assemble_name (file, buf);
33605 if (DEFAULT_ABI == ABI_AIX)
33607 const char *desc_name, *orig_name;
33609 orig_name = (*targetm.strip_name_encoding) (name);
33610 desc_name = orig_name;
33611 while (*desc_name == '.')
33614 if (TREE_PUBLIC (decl))
33615 fprintf (file, "\t.globl %s\n", desc_name);
33617 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33618 fprintf (file, "%s:\n", desc_name);
33619 fprintf (file, "\t.long %s\n", orig_name);
33620 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
33621 fputs ("\t.long 0\n", file);
33622 fprintf (file, "\t.previous\n");
33624 ASM_OUTPUT_LABEL (file, name);
33627 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
33629 rs6000_elf_file_end (void)
33631 #ifdef HAVE_AS_GNU_ATTRIBUTE
33632 /* ??? The value emitted depends on options active at file end.
33633 Assume anyone using #pragma or attributes that might change
33634 options knows what they are doing. */
33635 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
33636 && rs6000_passes_float)
33640 if (TARGET_HARD_FLOAT)
33644 if (rs6000_passes_long_double)
33646 if (!TARGET_LONG_DOUBLE_128)
33648 else if (TARGET_IEEEQUAD)
33653 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
33655 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
33657 if (rs6000_passes_vector)
33658 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
33659 (TARGET_ALTIVEC_ABI ? 2 : 1));
33660 if (rs6000_returns_struct)
33661 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
33662 aix_struct_return ? 2 : 1);
33665 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
33666 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
33667 file_end_indicate_exec_stack ();
33670 if (flag_split_stack)
33671 file_end_indicate_split_stack ();
33675 /* We have expanded a CPU builtin, so we need to emit a reference to
33676 the special symbol that LIBC uses to declare it supports the
33677 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
33678 switch_to_section (data_section);
33679 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
33680 fprintf (asm_out_file, "\t%s %s\n",
33681 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
33688 #ifndef HAVE_XCOFF_DWARF_EXTRAS
33689 #define HAVE_XCOFF_DWARF_EXTRAS 0
33692 static enum unwind_info_type
33693 rs6000_xcoff_debug_unwind_info (void)
33699 rs6000_xcoff_asm_output_anchor (rtx symbol)
33703 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
33704 SYMBOL_REF_BLOCK_OFFSET (symbol));
33705 fprintf (asm_out_file, "%s", SET_ASM_OP);
33706 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
33707 fprintf (asm_out_file, ",");
33708 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
33709 fprintf (asm_out_file, "\n");
33713 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
33715 fputs (GLOBAL_ASM_OP, stream);
33716 RS6000_OUTPUT_BASENAME (stream, name);
33717 putc ('\n', stream);
33720 /* A get_unnamed_decl callback, used for read-only sections. PTR
33721 points to the section string variable. */
33724 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
33726 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
33727 *(const char *const *) directive,
33728 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33731 /* Likewise for read-write sections. */
33734 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
33736 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
33737 *(const char *const *) directive,
33738 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33742 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
33744 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
33745 *(const char *const *) directive,
33746 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33749 /* A get_unnamed_section callback, used for switching to toc_section. */
33752 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
33754 if (TARGET_MINIMAL_TOC)
33756 /* toc_section is always selected at least once from
33757 rs6000_xcoff_file_start, so this is guaranteed to
33758 always be defined once and only once in each file. */
33759 if (!toc_initialized)
33761 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
33762 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
33763 toc_initialized = 1;
33765 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
33766 (TARGET_32BIT ? "" : ",3"));
33769 fputs ("\t.toc\n", asm_out_file);
33772 /* Implement TARGET_ASM_INIT_SECTIONS. */
33775 rs6000_xcoff_asm_init_sections (void)
33777 read_only_data_section
33778 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33779 &xcoff_read_only_section_name);
33781 private_data_section
33782 = get_unnamed_section (SECTION_WRITE,
33783 rs6000_xcoff_output_readwrite_section_asm_op,
33784 &xcoff_private_data_section_name);
33787 = get_unnamed_section (SECTION_TLS,
33788 rs6000_xcoff_output_tls_section_asm_op,
33789 &xcoff_tls_data_section_name);
33791 tls_private_data_section
33792 = get_unnamed_section (SECTION_TLS,
33793 rs6000_xcoff_output_tls_section_asm_op,
33794 &xcoff_private_data_section_name);
33796 read_only_private_data_section
33797 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33798 &xcoff_private_data_section_name);
33801 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
33803 readonly_data_section = read_only_data_section;
33807 rs6000_xcoff_reloc_rw_mask (void)
33813 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
33814 tree decl ATTRIBUTE_UNUSED)
33817 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
33819 if (flags & SECTION_EXCLUDE)
33821 else if (flags & SECTION_DEBUG)
33823 fprintf (asm_out_file, "\t.dwsect %s\n", name);
33826 else if (flags & SECTION_CODE)
33828 else if (flags & SECTION_TLS)
33830 else if (flags & SECTION_WRITE)
33835 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
33836 (flags & SECTION_CODE) ? "." : "",
33837 name, suffix[smclass], flags & SECTION_ENTSIZE);
33840 #define IN_NAMED_SECTION(DECL) \
33841 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
33842 && DECL_SECTION_NAME (DECL) != NULL)
33845 rs6000_xcoff_select_section (tree decl, int reloc,
33846 unsigned HOST_WIDE_INT align)
33848 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
33850 if (align > BIGGEST_ALIGNMENT)
33852 resolve_unique_section (decl, reloc, true);
33853 if (IN_NAMED_SECTION (decl))
33854 return get_named_section (decl, NULL, reloc);
33857 if (decl_readonly_section (decl, reloc))
33859 if (TREE_PUBLIC (decl))
33860 return read_only_data_section;
33862 return read_only_private_data_section;
33867 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
33869 if (TREE_PUBLIC (decl))
33870 return tls_data_section;
33871 else if (bss_initializer_p (decl))
33873 /* Convert to COMMON to emit in BSS. */
33874 DECL_COMMON (decl) = 1;
33875 return tls_comm_section;
33878 return tls_private_data_section;
33882 if (TREE_PUBLIC (decl))
33883 return data_section;
33885 return private_data_section;
33890 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
33894 /* Use select_section for private data and uninitialized data with
33895 alignment <= BIGGEST_ALIGNMENT. */
33896 if (!TREE_PUBLIC (decl)
33897 || DECL_COMMON (decl)
33898 || (DECL_INITIAL (decl) == NULL_TREE
33899 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
33900 || DECL_INITIAL (decl) == error_mark_node
33901 || (flag_zero_initialized_in_bss
33902 && initializer_zerop (DECL_INITIAL (decl))))
33905 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
33906 name = (*targetm.strip_name_encoding) (name);
33907 set_decl_section_name (decl, name);
33910 /* Select section for constant in constant pool.
33912 On RS/6000, all constants are in the private read-only data area.
33913 However, if this is being placed in the TOC it must be output as a
33917 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
33918 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
33920 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
33921 return toc_section;
33923 return read_only_private_data_section;
33926 /* Remove any trailing [DS] or the like from the symbol name. */
33928 static const char *
33929 rs6000_xcoff_strip_name_encoding (const char *name)
33934 len = strlen (name);
33935 if (name[len - 1] == ']')
33936 return ggc_alloc_string (name, len - 4);
33941 /* Section attributes. AIX is always PIC. */
33943 static unsigned int
33944 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
33946 unsigned int align;
33947 unsigned int flags = default_section_type_flags (decl, name, reloc);
33949 /* Align to at least UNIT size. */
33950 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
33951 align = MIN_UNITS_PER_WORD;
33953 /* Increase alignment of large objects if not already stricter. */
33954 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
33955 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
33956 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
33958 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
33961 /* Output at beginning of assembler file.
33963 Initialize the section names for the RS/6000 at this point.
33965 Specify filename, including full path, to assembler.
33967 We want to go into the TOC section so at least one .toc will be emitted.
33968 Also, in order to output proper .bs/.es pairs, we need at least one static
33969 [RW] section emitted.
33971 Finally, declare mcount when profiling to make the assembler happy. */
33974 rs6000_xcoff_file_start (void)
33976 rs6000_gen_section_name (&xcoff_bss_section_name,
33977 main_input_filename, ".bss_");
33978 rs6000_gen_section_name (&xcoff_private_data_section_name,
33979 main_input_filename, ".rw_");
33980 rs6000_gen_section_name (&xcoff_read_only_section_name,
33981 main_input_filename, ".ro_");
33982 rs6000_gen_section_name (&xcoff_tls_data_section_name,
33983 main_input_filename, ".tls_");
33984 rs6000_gen_section_name (&xcoff_tbss_section_name,
33985 main_input_filename, ".tbss_[UL]");
33987 fputs ("\t.file\t", asm_out_file);
33988 output_quoted_string (asm_out_file, main_input_filename);
33989 fputc ('\n', asm_out_file);
33990 if (write_symbols != NO_DEBUG)
33991 switch_to_section (private_data_section);
33992 switch_to_section (toc_section);
33993 switch_to_section (text_section);
33995 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
33996 rs6000_file_start ();
33999 /* Output at end of assembler file.
34000 On the RS/6000, referencing data should automatically pull in text. */
34003 rs6000_xcoff_file_end (void)
34005 switch_to_section (text_section);
34006 fputs ("_section_.text:\n", asm_out_file);
34007 switch_to_section (data_section);
34008 fputs (TARGET_32BIT
34009 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
34013 struct declare_alias_data
34016 bool function_descriptor;
34019 /* Declare alias N. A helper function for for_node_and_aliases. */
34022 rs6000_declare_alias (struct symtab_node *n, void *d)
34024 struct declare_alias_data *data = (struct declare_alias_data *)d;
34025 /* Main symbol is output specially, because varasm machinery does part of
34026 the job for us - we do not need to declare .globl/lglobs and such. */
34027 if (!n->alias || n->weakref)
34030 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
34033 /* Prevent assemble_alias from trying to use .set pseudo operation
34034 that does not behave as expected by the middle-end. */
34035 TREE_ASM_WRITTEN (n->decl) = true;
34037 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
34038 char *buffer = (char *) alloca (strlen (name) + 2);
34040 int dollar_inside = 0;
34042 strcpy (buffer, name);
34043 p = strchr (buffer, '$');
34047 p = strchr (p + 1, '$');
34049 if (TREE_PUBLIC (n->decl))
34051 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
34053 if (dollar_inside) {
34054 if (data->function_descriptor)
34055 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
34056 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
34058 if (data->function_descriptor)
34060 fputs ("\t.globl .", data->file);
34061 RS6000_OUTPUT_BASENAME (data->file, buffer);
34062 putc ('\n', data->file);
34064 fputs ("\t.globl ", data->file);
34065 RS6000_OUTPUT_BASENAME (data->file, buffer);
34066 putc ('\n', data->file);
34068 #ifdef ASM_WEAKEN_DECL
34069 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
34070 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
34077 if (data->function_descriptor)
34078 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
34079 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
34081 if (data->function_descriptor)
34083 fputs ("\t.lglobl .", data->file);
34084 RS6000_OUTPUT_BASENAME (data->file, buffer);
34085 putc ('\n', data->file);
34087 fputs ("\t.lglobl ", data->file);
34088 RS6000_OUTPUT_BASENAME (data->file, buffer);
34089 putc ('\n', data->file);
34091 if (data->function_descriptor)
34092 fputs (".", data->file);
34093 RS6000_OUTPUT_BASENAME (data->file, buffer);
34094 fputs (":\n", data->file);
34099 #ifdef HAVE_GAS_HIDDEN
34100 /* Helper function to calculate visibility of a DECL
34101 and return the value as a const string. */
34103 static const char *
34104 rs6000_xcoff_visibility (tree decl)
34106 static const char * const visibility_types[] = {
34107 "", ",protected", ",hidden", ",internal"
34110 enum symbol_visibility vis = DECL_VISIBILITY (decl);
34111 return visibility_types[vis];
34116 /* This macro produces the initial definition of a function name.
34117 On the RS/6000, we need to place an extra '.' in the function name and
34118 output the function descriptor.
34119 Dollar signs are converted to underscores.
34121 The csect for the function will have already been created when
34122 text_section was selected. We do have to go back to that csect, however.
34124 The third and fourth parameters to the .function pseudo-op (16 and 044)
34125 are placeholders which no longer have any use.
34127 Because AIX assembler's .set command has unexpected semantics, we output
34128 all aliases as alternative labels in front of the definition. */
34131 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
34133 char *buffer = (char *) alloca (strlen (name) + 1);
34135 int dollar_inside = 0;
34136 struct declare_alias_data data = {file, false};
34138 strcpy (buffer, name);
34139 p = strchr (buffer, '$');
34143 p = strchr (p + 1, '$');
34145 if (TREE_PUBLIC (decl))
34147 if (!RS6000_WEAK || !DECL_WEAK (decl))
34149 if (dollar_inside) {
34150 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
34151 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
34153 fputs ("\t.globl .", file);
34154 RS6000_OUTPUT_BASENAME (file, buffer);
34155 #ifdef HAVE_GAS_HIDDEN
34156 fputs (rs6000_xcoff_visibility (decl), file);
34163 if (dollar_inside) {
34164 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
34165 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
34167 fputs ("\t.lglobl .", file);
34168 RS6000_OUTPUT_BASENAME (file, buffer);
34171 fputs ("\t.csect ", file);
34172 RS6000_OUTPUT_BASENAME (file, buffer);
34173 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
34174 RS6000_OUTPUT_BASENAME (file, buffer);
34175 fputs (":\n", file);
34176 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34178 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
34179 RS6000_OUTPUT_BASENAME (file, buffer);
34180 fputs (", TOC[tc0], 0\n", file);
34182 switch_to_section (function_section (decl));
34184 RS6000_OUTPUT_BASENAME (file, buffer);
34185 fputs (":\n", file);
34186 data.function_descriptor = true;
34187 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34189 if (!DECL_IGNORED_P (decl))
34191 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
34192 xcoffout_declare_function (file, decl, buffer);
34193 else if (write_symbols == DWARF2_DEBUG)
34195 name = (*targetm.strip_name_encoding) (name);
34196 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
34203 /* Output assembly language to globalize a symbol from a DECL,
34204 possibly with visibility. */
34207 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
34209 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
34210 fputs (GLOBAL_ASM_OP, stream);
34211 RS6000_OUTPUT_BASENAME (stream, name);
34212 #ifdef HAVE_GAS_HIDDEN
34213 fputs (rs6000_xcoff_visibility (decl), stream);
34215 putc ('\n', stream);
34218 /* Output assembly language to define a symbol as COMMON from a DECL,
34219 possibly with visibility. */
34222 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
34223 tree decl ATTRIBUTE_UNUSED,
34225 unsigned HOST_WIDE_INT size,
34226 unsigned HOST_WIDE_INT align)
34228 unsigned HOST_WIDE_INT align2 = 2;
34231 align2 = floor_log2 (align / BITS_PER_UNIT);
34235 fputs (COMMON_ASM_OP, stream);
34236 RS6000_OUTPUT_BASENAME (stream, name);
34239 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
34242 #ifdef HAVE_GAS_HIDDEN
34244 fputs (rs6000_xcoff_visibility (decl), stream);
34246 putc ('\n', stream);
34249 /* This macro produces the initial definition of a object (variable) name.
34250 Because AIX assembler's .set command has unexpected semantics, we output
34251 all aliases as alternative labels in front of the definition. */
34254 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
34256 struct declare_alias_data data = {file, false};
34257 RS6000_OUTPUT_BASENAME (file, name);
34258 fputs (":\n", file);
34259 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34263 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
34266 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
34268 fputs (integer_asm_op (size, FALSE), file);
34269 assemble_name (file, label);
34270 fputs ("-$", file);
34273 /* Output a symbol offset relative to the dbase for the current object.
34274 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
34277 __gcc_unwind_dbase is embedded in all executables/libraries through
34278 libgcc/config/rs6000/crtdbase.S. */
34281 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
34283 fputs (integer_asm_op (size, FALSE), file);
34284 assemble_name (file, label);
34285 fputs("-__gcc_unwind_dbase", file);
34290 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
34294 const char *symname;
34296 default_encode_section_info (decl, rtl, first);
34298 /* Careful not to prod global register variables. */
34301 symbol = XEXP (rtl, 0);
34302 if (!SYMBOL_REF_P (symbol))
34305 flags = SYMBOL_REF_FLAGS (symbol);
34307 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
34308 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
34310 SYMBOL_REF_FLAGS (symbol) = flags;
34312 /* Append mapping class to extern decls. */
34313 symname = XSTR (symbol, 0);
34314 if (decl /* sync condition with assemble_external () */
34315 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
34316 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
34317 || TREE_CODE (decl) == FUNCTION_DECL)
34318 && symname[strlen (symname) - 1] != ']')
34320 char *newname = (char *) alloca (strlen (symname) + 5);
34321 strcpy (newname, symname);
34322 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
34323 ? "[DS]" : "[UA]"));
34324 XSTR (symbol, 0) = ggc_strdup (newname);
34327 #endif /* HAVE_AS_TLS */
34328 #endif /* TARGET_XCOFF */
34331 rs6000_asm_weaken_decl (FILE *stream, tree decl,
34332 const char *name, const char *val)
34334 fputs ("\t.weak\t", stream);
34335 RS6000_OUTPUT_BASENAME (stream, name);
34336 if (decl && TREE_CODE (decl) == FUNCTION_DECL
34337 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
34340 fputs ("[DS]", stream);
34341 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
34343 fputs (rs6000_xcoff_visibility (decl), stream);
34345 fputs ("\n\t.weak\t.", stream);
34346 RS6000_OUTPUT_BASENAME (stream, name);
34348 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
34350 fputs (rs6000_xcoff_visibility (decl), stream);
34352 fputc ('\n', stream);
34355 #ifdef ASM_OUTPUT_DEF
34356 ASM_OUTPUT_DEF (stream, name, val);
34358 if (decl && TREE_CODE (decl) == FUNCTION_DECL
34359 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
34361 fputs ("\t.set\t.", stream);
34362 RS6000_OUTPUT_BASENAME (stream, name);
34363 fputs (",.", stream);
34364 RS6000_OUTPUT_BASENAME (stream, val);
34365 fputc ('\n', stream);
34371 /* Return true if INSN should not be copied. */
34374 rs6000_cannot_copy_insn_p (rtx_insn *insn)
34376 return recog_memoized (insn) >= 0
34377 && get_attr_cannot_copy (insn);
34380 /* Compute a (partial) cost for rtx X. Return true if the complete
34381 cost has been computed, and false if subexpressions should be
34382 scanned. In either case, *TOTAL contains the cost result. */
34385 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
34386 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
34388 int code = GET_CODE (x);
34392 /* On the RS/6000, if it is valid in the insn, it is free. */
34394 if (((outer_code == SET
34395 || outer_code == PLUS
34396 || outer_code == MINUS)
34397 && (satisfies_constraint_I (x)
34398 || satisfies_constraint_L (x)))
34399 || (outer_code == AND
34400 && (satisfies_constraint_K (x)
34402 ? satisfies_constraint_L (x)
34403 : satisfies_constraint_J (x))))
34404 || ((outer_code == IOR || outer_code == XOR)
34405 && (satisfies_constraint_K (x)
34407 ? satisfies_constraint_L (x)
34408 : satisfies_constraint_J (x))))
34409 || outer_code == ASHIFT
34410 || outer_code == ASHIFTRT
34411 || outer_code == LSHIFTRT
34412 || outer_code == ROTATE
34413 || outer_code == ROTATERT
34414 || outer_code == ZERO_EXTRACT
34415 || (outer_code == MULT
34416 && satisfies_constraint_I (x))
34417 || ((outer_code == DIV || outer_code == UDIV
34418 || outer_code == MOD || outer_code == UMOD)
34419 && exact_log2 (INTVAL (x)) >= 0)
34420 || (outer_code == COMPARE
34421 && (satisfies_constraint_I (x)
34422 || satisfies_constraint_K (x)))
34423 || ((outer_code == EQ || outer_code == NE)
34424 && (satisfies_constraint_I (x)
34425 || satisfies_constraint_K (x)
34427 ? satisfies_constraint_L (x)
34428 : satisfies_constraint_J (x))))
34429 || (outer_code == GTU
34430 && satisfies_constraint_I (x))
34431 || (outer_code == LTU
34432 && satisfies_constraint_P (x)))
34437 else if ((outer_code == PLUS
34438 && reg_or_add_cint_operand (x, VOIDmode))
34439 || (outer_code == MINUS
34440 && reg_or_sub_cint_operand (x, VOIDmode))
34441 || ((outer_code == SET
34442 || outer_code == IOR
34443 || outer_code == XOR)
34445 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
34447 *total = COSTS_N_INSNS (1);
34453 case CONST_WIDE_INT:
34457 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34461 /* When optimizing for size, MEM should be slightly more expensive
34462 than generating address, e.g., (plus (reg) (const)).
34463 L1 cache latency is about two instructions. */
34464 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34465 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
34466 *total += COSTS_N_INSNS (100);
34475 if (FLOAT_MODE_P (mode))
34476 *total = rs6000_cost->fp;
34478 *total = COSTS_N_INSNS (1);
34482 if (CONST_INT_P (XEXP (x, 1))
34483 && satisfies_constraint_I (XEXP (x, 1)))
34485 if (INTVAL (XEXP (x, 1)) >= -256
34486 && INTVAL (XEXP (x, 1)) <= 255)
34487 *total = rs6000_cost->mulsi_const9;
34489 *total = rs6000_cost->mulsi_const;
34491 else if (mode == SFmode)
34492 *total = rs6000_cost->fp;
34493 else if (FLOAT_MODE_P (mode))
34494 *total = rs6000_cost->dmul;
34495 else if (mode == DImode)
34496 *total = rs6000_cost->muldi;
34498 *total = rs6000_cost->mulsi;
34502 if (mode == SFmode)
34503 *total = rs6000_cost->fp;
34505 *total = rs6000_cost->dmul;
34510 if (FLOAT_MODE_P (mode))
34512 *total = mode == DFmode ? rs6000_cost->ddiv
34513 : rs6000_cost->sdiv;
34520 if (CONST_INT_P (XEXP (x, 1))
34521 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
34523 if (code == DIV || code == MOD)
34525 *total = COSTS_N_INSNS (2);
34528 *total = COSTS_N_INSNS (1);
34532 if (GET_MODE (XEXP (x, 1)) == DImode)
34533 *total = rs6000_cost->divdi;
34535 *total = rs6000_cost->divsi;
34537 /* Add in shift and subtract for MOD unless we have a mod instruction. */
34538 if (!TARGET_MODULO && (code == MOD || code == UMOD))
34539 *total += COSTS_N_INSNS (2);
34543 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
34547 *total = COSTS_N_INSNS (4);
34551 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
34555 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
34559 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
34562 *total = COSTS_N_INSNS (1);
34566 if (CONST_INT_P (XEXP (x, 1)))
34568 rtx left = XEXP (x, 0);
34569 rtx_code left_code = GET_CODE (left);
34571 /* rotate-and-mask: 1 insn. */
34572 if ((left_code == ROTATE
34573 || left_code == ASHIFT
34574 || left_code == LSHIFTRT)
34575 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
34577 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
34578 if (!CONST_INT_P (XEXP (left, 1)))
34579 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
34580 *total += COSTS_N_INSNS (1);
34584 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
34585 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
34586 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
34587 || (val & 0xffff) == val
34588 || (val & 0xffff0000) == val
34589 || ((val & 0xffff) == 0 && mode == SImode))
34591 *total = rtx_cost (left, mode, AND, 0, speed);
34592 *total += COSTS_N_INSNS (1);
34597 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
34599 *total = rtx_cost (left, mode, AND, 0, speed);
34600 *total += COSTS_N_INSNS (2);
34605 *total = COSTS_N_INSNS (1);
34610 *total = COSTS_N_INSNS (1);
34616 *total = COSTS_N_INSNS (1);
34620 /* The EXTSWSLI instruction is a combined instruction. Don't count both
34621 the sign extend and shift separately within the insn. */
34622 if (TARGET_EXTSWSLI && mode == DImode
34623 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
34624 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
34635 /* Handle mul_highpart. */
34636 if (outer_code == TRUNCATE
34637 && GET_CODE (XEXP (x, 0)) == MULT)
34639 if (mode == DImode)
34640 *total = rs6000_cost->muldi;
34642 *total = rs6000_cost->mulsi;
34645 else if (outer_code == AND)
34648 *total = COSTS_N_INSNS (1);
34653 if (MEM_P (XEXP (x, 0)))
34656 *total = COSTS_N_INSNS (1);
34662 if (!FLOAT_MODE_P (mode))
34664 *total = COSTS_N_INSNS (1);
34670 case UNSIGNED_FLOAT:
34673 case FLOAT_TRUNCATE:
34674 *total = rs6000_cost->fp;
34678 if (mode == DFmode)
34679 *total = rs6000_cost->sfdf_convert;
34681 *total = rs6000_cost->fp;
34685 switch (XINT (x, 1))
34688 *total = rs6000_cost->fp;
34700 *total = COSTS_N_INSNS (1);
34703 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
34705 *total = rs6000_cost->fp;
34714 /* Carry bit requires mode == Pmode.
34715 NEG or PLUS already counted so only add one. */
34717 && (outer_code == NEG || outer_code == PLUS))
34719 *total = COSTS_N_INSNS (1);
34727 if (outer_code == SET)
34729 if (XEXP (x, 1) == const0_rtx)
34731 *total = COSTS_N_INSNS (2);
34736 *total = COSTS_N_INSNS (3);
34741 if (outer_code == COMPARE)
34755 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
34758 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
34759 int opno, int *total, bool speed)
34761 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
34764 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
34765 "opno = %d, total = %d, speed = %s, x:\n",
34766 ret ? "complete" : "scan inner",
34767 GET_MODE_NAME (mode),
34768 GET_RTX_NAME (outer_code),
34771 speed ? "true" : "false");
34779 rs6000_insn_cost (rtx_insn *insn, bool speed)
34781 if (recog_memoized (insn) < 0)
34785 return get_attr_length (insn);
34787 int cost = get_attr_cost (insn);
34791 int n = get_attr_length (insn) / 4;
34792 enum attr_type type = get_attr_type (insn);
34799 cost = COSTS_N_INSNS (n + 1);
34803 switch (get_attr_size (insn))
34806 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
34809 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
34812 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
34815 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
34818 gcc_unreachable ();
34822 switch (get_attr_size (insn))
34825 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
34828 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
34831 gcc_unreachable ();
34836 cost = n * rs6000_cost->fp;
34839 cost = n * rs6000_cost->dmul;
34842 cost = n * rs6000_cost->sdiv;
34845 cost = n * rs6000_cost->ddiv;
34852 cost = COSTS_N_INSNS (n + 2);
34856 cost = COSTS_N_INSNS (n);
34862 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
34865 rs6000_debug_address_cost (rtx x, machine_mode mode,
34866 addr_space_t as, bool speed)
34868 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
34870 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
34871 ret, speed ? "true" : "false");
34878 /* A C expression returning the cost of moving data from a register of class
34879 CLASS1 to one of CLASS2. */
34882 rs6000_register_move_cost (machine_mode mode,
34883 reg_class_t from, reg_class_t to)
34887 if (TARGET_DEBUG_COST)
34890 /* Moves from/to GENERAL_REGS. */
34891 if (reg_classes_intersect_p (to, GENERAL_REGS)
34892 || reg_classes_intersect_p (from, GENERAL_REGS))
34894 reg_class_t rclass = from;
34896 if (! reg_classes_intersect_p (to, GENERAL_REGS))
34899 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
34900 ret = (rs6000_memory_move_cost (mode, rclass, false)
34901 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
34903 /* It's more expensive to move CR_REGS than CR0_REGS because of the
34905 else if (rclass == CR_REGS)
34908 /* For those processors that have slow LR/CTR moves, make them more
34909 expensive than memory in order to bias spills to memory .*/
34910 else if ((rs6000_tune == PROCESSOR_POWER6
34911 || rs6000_tune == PROCESSOR_POWER7
34912 || rs6000_tune == PROCESSOR_POWER8
34913 || rs6000_tune == PROCESSOR_POWER9)
34914 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
34915 ret = 6 * hard_regno_nregs (0, mode);
34918 /* A move will cost one instruction per GPR moved. */
34919 ret = 2 * hard_regno_nregs (0, mode);
34922 /* If we have VSX, we can easily move between FPR or Altivec registers. */
34923 else if (VECTOR_MEM_VSX_P (mode)
34924 && reg_classes_intersect_p (to, VSX_REGS)
34925 && reg_classes_intersect_p (from, VSX_REGS))
34926 ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode);
34928 /* Moving between two similar registers is just one instruction. */
34929 else if (reg_classes_intersect_p (to, from))
34930 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
34932 /* Everything else has to go through GENERAL_REGS. */
34934 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
34935 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
34937 if (TARGET_DEBUG_COST)
34939 if (dbg_cost_ctrl == 1)
34941 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
34942 ret, GET_MODE_NAME (mode), reg_class_names[from],
34943 reg_class_names[to]);
34950 /* A C expressions returning the cost of moving data of MODE from a register to
34954 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
34955 bool in ATTRIBUTE_UNUSED)
34959 if (TARGET_DEBUG_COST)
34962 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
34963 ret = 4 * hard_regno_nregs (0, mode);
34964 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
34965 || reg_classes_intersect_p (rclass, VSX_REGS)))
34966 ret = 4 * hard_regno_nregs (32, mode);
34967 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
34968 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
34970 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
34972 if (TARGET_DEBUG_COST)
34974 if (dbg_cost_ctrl == 1)
34976 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
34977 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
34984 /* Returns a code for a target-specific builtin that implements
34985 reciprocal of the function, or NULL_TREE if not available. */
34988 rs6000_builtin_reciprocal (tree fndecl)
34990 switch (DECL_FUNCTION_CODE (fndecl))
34992 case VSX_BUILTIN_XVSQRTDP:
34993 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
34996 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
34998 case VSX_BUILTIN_XVSQRTSP:
34999 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
35002 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
35009 /* Load up a constant. If the mode is a vector mode, splat the value across
35010 all of the vector elements. */
35013 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
35017 if (mode == SFmode || mode == DFmode)
35019 rtx d = const_double_from_real_value (dconst, mode);
35020 reg = force_reg (mode, d);
35022 else if (mode == V4SFmode)
35024 rtx d = const_double_from_real_value (dconst, SFmode);
35025 rtvec v = gen_rtvec (4, d, d, d, d);
35026 reg = gen_reg_rtx (mode);
35027 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
35029 else if (mode == V2DFmode)
35031 rtx d = const_double_from_real_value (dconst, DFmode);
35032 rtvec v = gen_rtvec (2, d, d);
35033 reg = gen_reg_rtx (mode);
35034 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
35037 gcc_unreachable ();
35042 /* Generate an FMA instruction. */
35045 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
35047 machine_mode mode = GET_MODE (target);
35050 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
35051 gcc_assert (dst != NULL);
35054 emit_move_insn (target, dst);
35057 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
35060 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
35062 machine_mode mode = GET_MODE (dst);
35065 /* This is a tad more complicated, since the fnma_optab is for
35066 a different expression: fma(-m1, m2, a), which is the same
35067 thing except in the case of signed zeros.
35069 Fortunately we know that if FMA is supported that FNMSUB is
35070 also supported in the ISA. Just expand it directly. */
35072 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
35074 r = gen_rtx_NEG (mode, a);
35075 r = gen_rtx_FMA (mode, m1, m2, r);
35076 r = gen_rtx_NEG (mode, r);
35077 emit_insn (gen_rtx_SET (dst, r));
35080 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
35081 add a reg_note saying that this was a division. Support both scalar and
35082 vector divide. Assumes no trapping math and finite arguments. */
35085 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
35087 machine_mode mode = GET_MODE (dst);
35088 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
35091 /* Low precision estimates guarantee 5 bits of accuracy. High
35092 precision estimates guarantee 14 bits of accuracy. SFmode
35093 requires 23 bits of accuracy. DFmode requires 52 bits of
35094 accuracy. Each pass at least doubles the accuracy, leading
35095 to the following. */
35096 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
35097 if (mode == DFmode || mode == V2DFmode)
35100 enum insn_code code = optab_handler (smul_optab, mode);
35101 insn_gen_fn gen_mul = GEN_FCN (code);
35103 gcc_assert (code != CODE_FOR_nothing);
35105 one = rs6000_load_constant_and_splat (mode, dconst1);
35107 /* x0 = 1./d estimate */
35108 x0 = gen_reg_rtx (mode);
35109 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
35112 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
35115 /* e0 = 1. - d * x0 */
35116 e0 = gen_reg_rtx (mode);
35117 rs6000_emit_nmsub (e0, d, x0, one);
35119 /* x1 = x0 + e0 * x0 */
35120 x1 = gen_reg_rtx (mode);
35121 rs6000_emit_madd (x1, e0, x0, x0);
35123 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
35124 ++i, xprev = xnext, eprev = enext) {
35126 /* enext = eprev * eprev */
35127 enext = gen_reg_rtx (mode);
35128 emit_insn (gen_mul (enext, eprev, eprev));
35130 /* xnext = xprev + enext * xprev */
35131 xnext = gen_reg_rtx (mode);
35132 rs6000_emit_madd (xnext, enext, xprev, xprev);
35138 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
35140 /* u = n * xprev */
35141 u = gen_reg_rtx (mode);
35142 emit_insn (gen_mul (u, n, xprev));
35144 /* v = n - (d * u) */
35145 v = gen_reg_rtx (mode);
35146 rs6000_emit_nmsub (v, d, u, n);
35148 /* dst = (v * xprev) + u */
35149 rs6000_emit_madd (dst, v, xprev, u);
35152 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
35155 /* Goldschmidt's Algorithm for single/double-precision floating point
35156 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
35159 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
35161 machine_mode mode = GET_MODE (src);
35162 rtx e = gen_reg_rtx (mode);
35163 rtx g = gen_reg_rtx (mode);
35164 rtx h = gen_reg_rtx (mode);
35166 /* Low precision estimates guarantee 5 bits of accuracy. High
35167 precision estimates guarantee 14 bits of accuracy. SFmode
35168 requires 23 bits of accuracy. DFmode requires 52 bits of
35169 accuracy. Each pass at least doubles the accuracy, leading
35170 to the following. */
35171 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
35172 if (mode == DFmode || mode == V2DFmode)
35177 enum insn_code code = optab_handler (smul_optab, mode);
35178 insn_gen_fn gen_mul = GEN_FCN (code);
35180 gcc_assert (code != CODE_FOR_nothing);
35182 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
35184 /* e = rsqrt estimate */
35185 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
35188 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
35191 rtx zero = force_reg (mode, CONST0_RTX (mode));
35193 if (mode == SFmode)
35195 rtx target = emit_conditional_move (e, GT, src, zero, mode,
35198 emit_move_insn (e, target);
35202 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
35203 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
35207 /* g = sqrt estimate. */
35208 emit_insn (gen_mul (g, e, src));
35209 /* h = 1/(2*sqrt) estimate. */
35210 emit_insn (gen_mul (h, e, mhalf));
35216 rtx t = gen_reg_rtx (mode);
35217 rs6000_emit_nmsub (t, g, h, mhalf);
35218 /* Apply correction directly to 1/rsqrt estimate. */
35219 rs6000_emit_madd (dst, e, t, e);
35223 for (i = 0; i < passes; i++)
35225 rtx t1 = gen_reg_rtx (mode);
35226 rtx g1 = gen_reg_rtx (mode);
35227 rtx h1 = gen_reg_rtx (mode);
35229 rs6000_emit_nmsub (t1, g, h, mhalf);
35230 rs6000_emit_madd (g1, g, t1, g);
35231 rs6000_emit_madd (h1, h, t1, h);
35236 /* Multiply by 2 for 1/rsqrt. */
35237 emit_insn (gen_add3_insn (dst, h, h));
35242 rtx t = gen_reg_rtx (mode);
35243 rs6000_emit_nmsub (t, g, h, mhalf);
35244 rs6000_emit_madd (dst, g, t, g);
35250 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
35251 (Power7) targets. DST is the target, and SRC is the argument operand. */
35254 rs6000_emit_popcount (rtx dst, rtx src)
35256 machine_mode mode = GET_MODE (dst);
35259 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
35260 if (TARGET_POPCNTD)
35262 if (mode == SImode)
35263 emit_insn (gen_popcntdsi2 (dst, src));
35265 emit_insn (gen_popcntddi2 (dst, src));
35269 tmp1 = gen_reg_rtx (mode);
35271 if (mode == SImode)
35273 emit_insn (gen_popcntbsi2 (tmp1, src));
35274 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
35276 tmp2 = force_reg (SImode, tmp2);
35277 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
35281 emit_insn (gen_popcntbdi2 (tmp1, src));
35282 tmp2 = expand_mult (DImode, tmp1,
35283 GEN_INT ((HOST_WIDE_INT)
35284 0x01010101 << 32 | 0x01010101),
35286 tmp2 = force_reg (DImode, tmp2);
35287 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
35292 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
35293 target, and SRC is the argument operand. */
35296 rs6000_emit_parity (rtx dst, rtx src)
35298 machine_mode mode = GET_MODE (dst);
35301 tmp = gen_reg_rtx (mode);
35303 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
35306 if (mode == SImode)
35308 emit_insn (gen_popcntbsi2 (tmp, src));
35309 emit_insn (gen_paritysi2_cmpb (dst, tmp));
35313 emit_insn (gen_popcntbdi2 (tmp, src));
35314 emit_insn (gen_paritydi2_cmpb (dst, tmp));
35319 if (mode == SImode)
35321 /* Is mult+shift >= shift+xor+shift+xor? */
35322 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
35324 rtx tmp1, tmp2, tmp3, tmp4;
35326 tmp1 = gen_reg_rtx (SImode);
35327 emit_insn (gen_popcntbsi2 (tmp1, src));
35329 tmp2 = gen_reg_rtx (SImode);
35330 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
35331 tmp3 = gen_reg_rtx (SImode);
35332 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
35334 tmp4 = gen_reg_rtx (SImode);
35335 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
35336 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
35339 rs6000_emit_popcount (tmp, src);
35340 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
35344 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
35345 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
35347 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
35349 tmp1 = gen_reg_rtx (DImode);
35350 emit_insn (gen_popcntbdi2 (tmp1, src));
35352 tmp2 = gen_reg_rtx (DImode);
35353 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
35354 tmp3 = gen_reg_rtx (DImode);
35355 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
35357 tmp4 = gen_reg_rtx (DImode);
35358 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
35359 tmp5 = gen_reg_rtx (DImode);
35360 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
35362 tmp6 = gen_reg_rtx (DImode);
35363 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
35364 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
35367 rs6000_emit_popcount (tmp, src);
35368 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
35372 /* Expand an Altivec constant permutation for little endian mode.
35373 OP0 and OP1 are the input vectors and TARGET is the output vector.
35374 SEL specifies the constant permutation vector.
35376 There are two issues: First, the two input operands must be
35377 swapped so that together they form a double-wide array in LE
35378 order. Second, the vperm instruction has surprising behavior
35379 in LE mode: it interprets the elements of the source vectors
35380 in BE mode ("left to right") and interprets the elements of
35381 the destination vector in LE mode ("right to left"). To
35382 correct for this, we must subtract each element of the permute
35383 control vector from 31.
35385 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
35386 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
35387 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
35388 serve as the permute control vector. Then, in BE mode,
35392 places the desired result in vr9. However, in LE mode the
35393 vector contents will be
35395 vr10 = 00000003 00000002 00000001 00000000
35396 vr11 = 00000007 00000006 00000005 00000004
35398 The result of the vperm using the same permute control vector is
35400 vr9 = 05000000 07000000 01000000 03000000
35402 That is, the leftmost 4 bytes of vr10 are interpreted as the
35403 source for the rightmost 4 bytes of vr9, and so on.
35405 If we change the permute control vector to
35407 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
35415 vr9 = 00000006 00000004 00000002 00000000. */
35418 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
35419 const vec_perm_indices &sel)
35423 rtx constv, unspec;
35425 /* Unpack and adjust the constant selector. */
35426 for (i = 0; i < 16; ++i)
35428 unsigned int elt = 31 - (sel[i] & 31);
35429 perm[i] = GEN_INT (elt);
35432 /* Expand to a permute, swapping the inputs and using the
35433 adjusted selector. */
35435 op0 = force_reg (V16QImode, op0);
35437 op1 = force_reg (V16QImode, op1);
35439 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
35440 constv = force_reg (V16QImode, constv);
35441 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
35443 if (!REG_P (target))
35445 rtx tmp = gen_reg_rtx (V16QImode);
35446 emit_move_insn (tmp, unspec);
35450 emit_move_insn (target, unspec);
35453 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
35454 permute control vector. But here it's not a constant, so we must
35455 generate a vector NAND or NOR to do the adjustment. */
35458 altivec_expand_vec_perm_le (rtx operands[4])
35460 rtx notx, iorx, unspec;
35461 rtx target = operands[0];
35462 rtx op0 = operands[1];
35463 rtx op1 = operands[2];
35464 rtx sel = operands[3];
35466 rtx norreg = gen_reg_rtx (V16QImode);
35467 machine_mode mode = GET_MODE (target);
35469 /* Get everything in regs so the pattern matches. */
35471 op0 = force_reg (mode, op0);
35473 op1 = force_reg (mode, op1);
35475 sel = force_reg (V16QImode, sel);
35476 if (!REG_P (target))
35477 tmp = gen_reg_rtx (mode);
35479 if (TARGET_P9_VECTOR)
35481 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
35486 /* Invert the selector with a VNAND if available, else a VNOR.
35487 The VNAND is preferred for future fusion opportunities. */
35488 notx = gen_rtx_NOT (V16QImode, sel);
35489 iorx = (TARGET_P8_VECTOR
35490 ? gen_rtx_IOR (V16QImode, notx, notx)
35491 : gen_rtx_AND (V16QImode, notx, notx));
35492 emit_insn (gen_rtx_SET (norreg, iorx));
35494 /* Permute with operands reversed and adjusted selector. */
35495 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
35499 /* Copy into target, possibly by way of a register. */
35500 if (!REG_P (target))
35502 emit_move_insn (tmp, unspec);
35506 emit_move_insn (target, unspec);
35509 /* Expand an Altivec constant permutation. Return true if we match
35510 an efficient implementation; false to fall back to VPERM.
35512 OP0 and OP1 are the input vectors and TARGET is the output vector.
35513 SEL specifies the constant permutation vector. */
35516 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
35517 const vec_perm_indices &sel)
35519 struct altivec_perm_insn {
35520 HOST_WIDE_INT mask;
35521 enum insn_code impl;
35522 unsigned char perm[16];
35524 static const struct altivec_perm_insn patterns[] = {
35525 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
35526 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
35527 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
35528 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
35529 { OPTION_MASK_ALTIVEC,
35530 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
35531 : CODE_FOR_altivec_vmrglb_direct),
35532 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
35533 { OPTION_MASK_ALTIVEC,
35534 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
35535 : CODE_FOR_altivec_vmrglh_direct),
35536 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
35537 { OPTION_MASK_ALTIVEC,
35538 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
35539 : CODE_FOR_altivec_vmrglw_direct),
35540 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
35541 { OPTION_MASK_ALTIVEC,
35542 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
35543 : CODE_FOR_altivec_vmrghb_direct),
35544 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
35545 { OPTION_MASK_ALTIVEC,
35546 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
35547 : CODE_FOR_altivec_vmrghh_direct),
35548 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
35549 { OPTION_MASK_ALTIVEC,
35550 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
35551 : CODE_FOR_altivec_vmrghw_direct),
35552 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
35553 { OPTION_MASK_P8_VECTOR,
35554 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
35555 : CODE_FOR_p8_vmrgow_v4sf_direct),
35556 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
35557 { OPTION_MASK_P8_VECTOR,
35558 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
35559 : CODE_FOR_p8_vmrgew_v4sf_direct),
35560 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
35563 unsigned int i, j, elt, which;
35564 unsigned char perm[16];
35568 /* Unpack the constant selector. */
35569 for (i = which = 0; i < 16; ++i)
35572 which |= (elt < 16 ? 1 : 2);
35576 /* Simplify the constant selector based on operands. */
35580 gcc_unreachable ();
35584 if (!rtx_equal_p (op0, op1))
35589 for (i = 0; i < 16; ++i)
35601 /* Look for splat patterns. */
35606 for (i = 0; i < 16; ++i)
35607 if (perm[i] != elt)
35611 if (!BYTES_BIG_ENDIAN)
35613 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
35619 for (i = 0; i < 16; i += 2)
35620 if (perm[i] != elt || perm[i + 1] != elt + 1)
35624 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
35625 x = gen_reg_rtx (V8HImode);
35626 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
35628 emit_move_insn (target, gen_lowpart (V16QImode, x));
35635 for (i = 0; i < 16; i += 4)
35637 || perm[i + 1] != elt + 1
35638 || perm[i + 2] != elt + 2
35639 || perm[i + 3] != elt + 3)
35643 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
35644 x = gen_reg_rtx (V4SImode);
35645 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
35647 emit_move_insn (target, gen_lowpart (V16QImode, x));
35653 /* Look for merge and pack patterns. */
35654 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
35658 if ((patterns[j].mask & rs6000_isa_flags) == 0)
35661 elt = patterns[j].perm[0];
35662 if (perm[0] == elt)
35664 else if (perm[0] == elt + 16)
35668 for (i = 1; i < 16; ++i)
35670 elt = patterns[j].perm[i];
35672 elt = (elt >= 16 ? elt - 16 : elt + 16);
35673 else if (one_vec && elt >= 16)
35675 if (perm[i] != elt)
35680 enum insn_code icode = patterns[j].impl;
35681 machine_mode omode = insn_data[icode].operand[0].mode;
35682 machine_mode imode = insn_data[icode].operand[1].mode;
35684 /* For little-endian, don't use vpkuwum and vpkuhum if the
35685 underlying vector type is not V4SI and V8HI, respectively.
35686 For example, using vpkuwum with a V8HI picks up the even
35687 halfwords (BE numbering) when the even halfwords (LE
35688 numbering) are what we need. */
35689 if (!BYTES_BIG_ENDIAN
35690 && icode == CODE_FOR_altivec_vpkuwum_direct
35692 && GET_MODE (op0) != V4SImode)
35694 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
35696 if (!BYTES_BIG_ENDIAN
35697 && icode == CODE_FOR_altivec_vpkuhum_direct
35699 && GET_MODE (op0) != V8HImode)
35701 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
35704 /* For little-endian, the two input operands must be swapped
35705 (or swapped back) to ensure proper right-to-left numbering
35707 if (swapped ^ !BYTES_BIG_ENDIAN)
35708 std::swap (op0, op1);
35709 if (imode != V16QImode)
35711 op0 = gen_lowpart (imode, op0);
35712 op1 = gen_lowpart (imode, op1);
35714 if (omode == V16QImode)
35717 x = gen_reg_rtx (omode);
35718 emit_insn (GEN_FCN (icode) (x, op0, op1));
35719 if (omode != V16QImode)
35720 emit_move_insn (target, gen_lowpart (V16QImode, x));
35725 if (!BYTES_BIG_ENDIAN)
35727 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
35734 /* Expand a VSX Permute Doubleword constant permutation.
35735 Return true if we match an efficient implementation. */
35738 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
35739 unsigned char perm0, unsigned char perm1)
35743 /* If both selectors come from the same operand, fold to single op. */
35744 if ((perm0 & 2) == (perm1 & 2))
35751 /* If both operands are equal, fold to simpler permutation. */
35752 if (rtx_equal_p (op0, op1))
35755 perm1 = (perm1 & 1) + 2;
35757 /* If the first selector comes from the second operand, swap. */
35758 else if (perm0 & 2)
35764 std::swap (op0, op1);
35766 /* If the second selector does not come from the second operand, fail. */
35767 else if ((perm1 & 2) == 0)
35771 if (target != NULL)
35773 machine_mode vmode, dmode;
35776 vmode = GET_MODE (target);
35777 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
35778 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
35779 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
35780 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
35781 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
35782 emit_insn (gen_rtx_SET (target, x));
35787 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
35790 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
35791 rtx op1, const vec_perm_indices &sel)
35793 bool testing_p = !target;
35795 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
35796 if (TARGET_ALTIVEC && testing_p)
35799 /* Check for ps_merge* or xxpermdi insns. */
35800 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
35804 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
35805 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
35807 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
35811 if (TARGET_ALTIVEC)
35813 /* Force the target-independent code to lower to V16QImode. */
35814 if (vmode != V16QImode)
35816 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
35823 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
35824 OP0 and OP1 are the input vectors and TARGET is the output vector.
35825 PERM specifies the constant permutation vector. */
35828 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
35829 machine_mode vmode, const vec_perm_builder &perm)
35831 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
35833 emit_move_insn (target, x);
35836 /* Expand an extract even operation. */
35839 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
35841 machine_mode vmode = GET_MODE (target);
35842 unsigned i, nelt = GET_MODE_NUNITS (vmode);
35843 vec_perm_builder perm (nelt, nelt, 1);
35845 for (i = 0; i < nelt; i++)
35846 perm.quick_push (i * 2);
35848 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
35851 /* Expand a vector interleave operation. */
35854 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
35856 machine_mode vmode = GET_MODE (target);
35857 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
35858 vec_perm_builder perm (nelt, nelt, 1);
35860 high = (highp ? 0 : nelt / 2);
35861 for (i = 0; i < nelt / 2; i++)
35863 perm.quick_push (i + high);
35864 perm.quick_push (i + nelt + high);
35867 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
35870 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
35872 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
35874 HOST_WIDE_INT hwi_scale (scale);
35875 REAL_VALUE_TYPE r_pow;
35876 rtvec v = rtvec_alloc (2);
35878 rtx scale_vec = gen_reg_rtx (V2DFmode);
35879 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
35880 elt = const_double_from_real_value (r_pow, DFmode);
35881 RTVEC_ELT (v, 0) = elt;
35882 RTVEC_ELT (v, 1) = elt;
35883 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
35884 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
35887 /* Return an RTX representing where to find the function value of a
35888 function returning MODE. */
35890 rs6000_complex_function_value (machine_mode mode)
35892 unsigned int regno;
35894 machine_mode inner = GET_MODE_INNER (mode);
35895 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
35897 if (TARGET_FLOAT128_TYPE
35899 || (mode == TCmode && TARGET_IEEEQUAD)))
35900 regno = ALTIVEC_ARG_RETURN;
35902 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35903 regno = FP_ARG_RETURN;
35907 regno = GP_ARG_RETURN;
35909 /* 32-bit is OK since it'll go in r3/r4. */
35910 if (TARGET_32BIT && inner_bytes >= 4)
35911 return gen_rtx_REG (mode, regno);
35914 if (inner_bytes >= 8)
35915 return gen_rtx_REG (mode, regno);
35917 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
35919 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
35920 GEN_INT (inner_bytes));
35921 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
35924 /* Return an rtx describing a return value of MODE as a PARALLEL
35925 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
35926 stride REG_STRIDE. */
35929 rs6000_parallel_return (machine_mode mode,
35930 int n_elts, machine_mode elt_mode,
35931 unsigned int regno, unsigned int reg_stride)
35933 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
35936 for (i = 0; i < n_elts; i++)
35938 rtx r = gen_rtx_REG (elt_mode, regno);
35939 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
35940 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
35941 regno += reg_stride;
35947 /* Target hook for TARGET_FUNCTION_VALUE.
35949 An integer value is in r3 and a floating-point value is in fp1,
35950 unless -msoft-float. */
35953 rs6000_function_value (const_tree valtype,
35954 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
35955 bool outgoing ATTRIBUTE_UNUSED)
35958 unsigned int regno;
35959 machine_mode elt_mode;
35962 /* Special handling for structs in darwin64. */
35964 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
35966 CUMULATIVE_ARGS valcum;
35970 valcum.fregno = FP_ARG_MIN_REG;
35971 valcum.vregno = ALTIVEC_ARG_MIN_REG;
35972 /* Do a trial code generation as if this were going to be passed as
35973 an argument; if any part goes in memory, we return NULL. */
35974 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
35977 /* Otherwise fall through to standard ABI rules. */
35980 mode = TYPE_MODE (valtype);
35982 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
35983 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
35985 int first_reg, n_regs;
35987 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
35989 /* _Decimal128 must use even/odd register pairs. */
35990 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35991 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
35995 first_reg = ALTIVEC_ARG_RETURN;
35999 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
36002 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
36003 if (TARGET_32BIT && TARGET_POWERPC64)
36012 int count = GET_MODE_SIZE (mode) / 4;
36013 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
36016 if ((INTEGRAL_TYPE_P (valtype)
36017 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
36018 || POINTER_TYPE_P (valtype))
36019 mode = TARGET_32BIT ? SImode : DImode;
36021 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
36022 /* _Decimal128 must use an even/odd register pair. */
36023 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
36024 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
36025 && !FLOAT128_VECTOR_P (mode))
36026 regno = FP_ARG_RETURN;
36027 else if (TREE_CODE (valtype) == COMPLEX_TYPE
36028 && targetm.calls.split_complex_arg)
36029 return rs6000_complex_function_value (mode);
36030 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
36031 return register is used in both cases, and we won't see V2DImode/V2DFmode
36032 for pure altivec, combine the two cases. */
36033 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
36034 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
36035 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
36036 regno = ALTIVEC_ARG_RETURN;
36038 regno = GP_ARG_RETURN;
36040 return gen_rtx_REG (mode, regno);
36043 /* Define how to find the value returned by a library function
36044 assuming the value has mode MODE. */
36046 rs6000_libcall_value (machine_mode mode)
36048 unsigned int regno;
36050 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
36051 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
36052 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
36054 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
36055 /* _Decimal128 must use an even/odd register pair. */
36056 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
36057 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
36058 regno = FP_ARG_RETURN;
36059 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
36060 return register is used in both cases, and we won't see V2DImode/V2DFmode
36061 for pure altivec, combine the two cases. */
36062 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
36063 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
36064 regno = ALTIVEC_ARG_RETURN;
36065 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
36066 return rs6000_complex_function_value (mode);
36068 regno = GP_ARG_RETURN;
36070 return gen_rtx_REG (mode, regno);
36073 /* Compute register pressure classes. We implement the target hook to avoid
36074 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
36075 lead to incorrect estimates of number of available registers and therefor
36076 increased register pressure/spill. */
36078 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
36083 pressure_classes[n++] = GENERAL_REGS;
36085 pressure_classes[n++] = VSX_REGS;
36088 if (TARGET_ALTIVEC)
36089 pressure_classes[n++] = ALTIVEC_REGS;
36090 if (TARGET_HARD_FLOAT)
36091 pressure_classes[n++] = FLOAT_REGS;
36093 pressure_classes[n++] = CR_REGS;
36094 pressure_classes[n++] = SPECIAL_REGS;
36099 /* Given FROM and TO register numbers, say whether this elimination is allowed.
36100 Frame pointer elimination is automatically handled.
36102 For the RS/6000, if frame pointer elimination is being done, we would like
36103 to convert ap into fp, not sp.
36105 We need r30 if -mminimal-toc was specified, and there are constant pool
36109 rs6000_can_eliminate (const int from, const int to)
36111 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
36112 ? ! frame_pointer_needed
36113 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
36114 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
36115 || constant_pool_empty_p ()
36119 /* Define the offset between two registers, FROM to be eliminated and its
36120 replacement TO, at the start of a routine. */
36122 rs6000_initial_elimination_offset (int from, int to)
36124 rs6000_stack_t *info = rs6000_stack_info ();
36125 HOST_WIDE_INT offset;
36127 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36128 offset = info->push_p ? 0 : -info->total_size;
36129 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36131 offset = info->push_p ? 0 : -info->total_size;
36132 if (FRAME_GROWS_DOWNWARD)
36133 offset += info->fixed_size + info->vars_size + info->parm_size;
36135 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
36136 offset = FRAME_GROWS_DOWNWARD
36137 ? info->fixed_size + info->vars_size + info->parm_size
36139 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
36140 offset = info->total_size;
36141 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36142 offset = info->push_p ? info->total_size : 0;
36143 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
36146 gcc_unreachable ();
36151 /* Fill in sizes of registers used by unwinder. */
36154 rs6000_init_dwarf_reg_sizes_extra (tree address)
36156 if (TARGET_MACHO && ! TARGET_ALTIVEC)
36159 machine_mode mode = TYPE_MODE (char_type_node);
36160 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
36161 rtx mem = gen_rtx_MEM (BLKmode, addr);
36162 rtx value = gen_int_mode (16, mode);
36164 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
36165 The unwinder still needs to know the size of Altivec registers. */
36167 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
36169 int column = DWARF_REG_TO_UNWIND_COLUMN
36170 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
36171 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
36173 emit_move_insn (adjust_address (mem, mode, offset), value);
36178 /* Map internal gcc register numbers to debug format register numbers.
36179 FORMAT specifies the type of debug register number to use:
36180 0 -- debug information, except for frame-related sections
36181 1 -- DWARF .debug_frame section
36182 2 -- DWARF .eh_frame section */
36185 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
36187 /* Except for the above, we use the internal number for non-DWARF
36188 debug information, and also for .eh_frame. */
36189 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
36192 /* On some platforms, we use the standard DWARF register
36193 numbering for .debug_info and .debug_frame. */
36194 #ifdef RS6000_USE_DWARF_NUMBERING
36197 if (regno == LR_REGNO)
36199 if (regno == CTR_REGNO)
36201 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
36202 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
36203 The actual code emitted saves the whole of CR, so we map CR2_REGNO
36204 to the DWARF reg for CR. */
36205 if (format == 1 && regno == CR2_REGNO)
36207 if (CR_REGNO_P (regno))
36208 return regno - CR0_REGNO + 86;
36209 if (regno == CA_REGNO)
36210 return 101; /* XER */
36211 if (ALTIVEC_REGNO_P (regno))
36212 return regno - FIRST_ALTIVEC_REGNO + 1124;
36213 if (regno == VRSAVE_REGNO)
36215 if (regno == VSCR_REGNO)
36221 /* target hook eh_return_filter_mode */
36222 static scalar_int_mode
36223 rs6000_eh_return_filter_mode (void)
36225 return TARGET_32BIT ? SImode : word_mode;
36228 /* Target hook for translate_mode_attribute. */
36229 static machine_mode
36230 rs6000_translate_mode_attribute (machine_mode mode)
36232 if ((FLOAT128_IEEE_P (mode)
36233 && ieee128_float_type_node == long_double_type_node)
36234 || (FLOAT128_IBM_P (mode)
36235 && ibm128_float_type_node == long_double_type_node))
36236 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
36240 /* Target hook for scalar_mode_supported_p. */
36242 rs6000_scalar_mode_supported_p (scalar_mode mode)
36244 /* -m32 does not support TImode. This is the default, from
36245 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
36246 same ABI as for -m32. But default_scalar_mode_supported_p allows
36247 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
36248 for -mpowerpc64. */
36249 if (TARGET_32BIT && mode == TImode)
36252 if (DECIMAL_FLOAT_MODE_P (mode))
36253 return default_decimal_float_supported_p ();
36254 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
36257 return default_scalar_mode_supported_p (mode);
36260 /* Target hook for vector_mode_supported_p. */
36262 rs6000_vector_mode_supported_p (machine_mode mode)
36264 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
36265 128-bit, the compiler might try to widen IEEE 128-bit to IBM
36267 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
36274 /* Target hook for floatn_mode. */
36275 static opt_scalar_float_mode
36276 rs6000_floatn_mode (int n, bool extended)
36286 if (TARGET_FLOAT128_TYPE)
36287 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36289 return opt_scalar_float_mode ();
36292 return opt_scalar_float_mode ();
36295 /* Those are the only valid _FloatNx types. */
36296 gcc_unreachable ();
36310 if (TARGET_FLOAT128_TYPE)
36311 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36313 return opt_scalar_float_mode ();
36316 return opt_scalar_float_mode ();
36322 /* Target hook for c_mode_for_suffix. */
36323 static machine_mode
36324 rs6000_c_mode_for_suffix (char suffix)
36326 if (TARGET_FLOAT128_TYPE)
36328 if (suffix == 'q' || suffix == 'Q')
36329 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36331 /* At the moment, we are not defining a suffix for IBM extended double.
36332 If/when the default for -mabi=ieeelongdouble is changed, and we want
36333 to support __ibm128 constants in legacy library code, we may need to
36334 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
36335 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
36336 __float80 constants. */
36342 /* Target hook for invalid_arg_for_unprototyped_fn. */
36343 static const char *
36344 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
36346 return (!rs6000_darwin64_abi
36348 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
36349 && (funcdecl == NULL_TREE
36350 || (TREE_CODE (funcdecl) == FUNCTION_DECL
36351 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
36352 ? N_("AltiVec argument passed to unprototyped function")
36356 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
36357 setup by using __stack_chk_fail_local hidden function instead of
36358 calling __stack_chk_fail directly. Otherwise it is better to call
36359 __stack_chk_fail directly. */
36361 static tree ATTRIBUTE_UNUSED
36362 rs6000_stack_protect_fail (void)
36364 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
36365 ? default_hidden_stack_protect_fail ()
36366 : default_external_stack_protect_fail ();
36369 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
36372 static unsigned HOST_WIDE_INT
36373 rs6000_asan_shadow_offset (void)
36375 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
36379 /* Mask options that we want to support inside of attribute((target)) and
36380 #pragma GCC target operations. Note, we do not include things like
36381 64/32-bit, endianness, hard/soft floating point, etc. that would have
36382 different calling sequences. */
36384 struct rs6000_opt_mask {
36385 const char *name; /* option name */
36386 HOST_WIDE_INT mask; /* mask to set */
36387 bool invert; /* invert sense of mask */
36388 bool valid_target; /* option is a target option */
36391 static struct rs6000_opt_mask const rs6000_opt_masks[] =
36393 { "altivec", OPTION_MASK_ALTIVEC, false, true },
36394 { "cmpb", OPTION_MASK_CMPB, false, true },
36395 { "crypto", OPTION_MASK_CRYPTO, false, true },
36396 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
36397 { "dlmzb", OPTION_MASK_DLMZB, false, true },
36398 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
36400 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
36401 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
36402 { "fprnd", OPTION_MASK_FPRND, false, true },
36403 { "hard-dfp", OPTION_MASK_DFP, false, true },
36404 { "htm", OPTION_MASK_HTM, false, true },
36405 { "isel", OPTION_MASK_ISEL, false, true },
36406 { "mfcrf", OPTION_MASK_MFCRF, false, true },
36407 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
36408 { "modulo", OPTION_MASK_MODULO, false, true },
36409 { "mulhw", OPTION_MASK_MULHW, false, true },
36410 { "multiple", OPTION_MASK_MULTIPLE, false, true },
36411 { "popcntb", OPTION_MASK_POPCNTB, false, true },
36412 { "popcntd", OPTION_MASK_POPCNTD, false, true },
36413 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
36414 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
36415 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
36416 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
36417 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
36418 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
36419 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
36420 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
36421 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
36422 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
36423 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
36424 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
36425 { "string", 0, false, true },
36426 { "update", OPTION_MASK_NO_UPDATE, true , true },
36427 { "vsx", OPTION_MASK_VSX, false, true },
36428 #ifdef OPTION_MASK_64BIT
36430 { "aix64", OPTION_MASK_64BIT, false, false },
36431 { "aix32", OPTION_MASK_64BIT, true, false },
36433 { "64", OPTION_MASK_64BIT, false, false },
36434 { "32", OPTION_MASK_64BIT, true, false },
36437 #ifdef OPTION_MASK_EABI
36438 { "eabi", OPTION_MASK_EABI, false, false },
36440 #ifdef OPTION_MASK_LITTLE_ENDIAN
36441 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
36442 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
36444 #ifdef OPTION_MASK_RELOCATABLE
36445 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
36447 #ifdef OPTION_MASK_STRICT_ALIGN
36448 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
36450 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
36451 { "string", 0, false, false },
36454 /* Builtin mask mapping for printing the flags. */
36455 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
36457 { "altivec", RS6000_BTM_ALTIVEC, false, false },
36458 { "vsx", RS6000_BTM_VSX, false, false },
36459 { "fre", RS6000_BTM_FRE, false, false },
36460 { "fres", RS6000_BTM_FRES, false, false },
36461 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
36462 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
36463 { "popcntd", RS6000_BTM_POPCNTD, false, false },
36464 { "cell", RS6000_BTM_CELL, false, false },
36465 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
36466 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
36467 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
36468 { "crypto", RS6000_BTM_CRYPTO, false, false },
36469 { "htm", RS6000_BTM_HTM, false, false },
36470 { "hard-dfp", RS6000_BTM_DFP, false, false },
36471 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
36472 { "long-double-128", RS6000_BTM_LDBL128, false, false },
36473 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
36474 { "float128", RS6000_BTM_FLOAT128, false, false },
36475 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
36478 /* Option variables that we want to support inside attribute((target)) and
36479 #pragma GCC target operations. */
36481 struct rs6000_opt_var {
36482 const char *name; /* option name */
36483 size_t global_offset; /* offset of the option in global_options. */
36484 size_t target_offset; /* offset of the option in target options. */
36487 static struct rs6000_opt_var const rs6000_opt_vars[] =
36490 offsetof (struct gcc_options, x_TARGET_FRIZ),
36491 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
36492 { "avoid-indexed-addresses",
36493 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
36494 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
36496 offsetof (struct gcc_options, x_rs6000_default_long_calls),
36497 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
36498 { "optimize-swaps",
36499 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
36500 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
36501 { "allow-movmisalign",
36502 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
36503 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
36505 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
36506 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
36508 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
36509 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
36510 { "align-branch-targets",
36511 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
36512 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
36514 offsetof (struct gcc_options, x_tls_markers),
36515 offsetof (struct cl_target_option, x_tls_markers), },
36517 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36518 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36520 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36521 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36522 { "speculate-indirect-jumps",
36523 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
36524 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
36527 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
36528 parsing. Return true if there were no errors. */
36531 rs6000_inner_target_options (tree args, bool attr_p)
36535 if (args == NULL_TREE)
36538 else if (TREE_CODE (args) == STRING_CST)
36540 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36543 while ((q = strtok (p, ",")) != NULL)
36545 bool error_p = false;
36546 bool not_valid_p = false;
36547 const char *cpu_opt = NULL;
36550 if (strncmp (q, "cpu=", 4) == 0)
36552 int cpu_index = rs6000_cpu_name_lookup (q+4);
36553 if (cpu_index >= 0)
36554 rs6000_cpu_index = cpu_index;
36561 else if (strncmp (q, "tune=", 5) == 0)
36563 int tune_index = rs6000_cpu_name_lookup (q+5);
36564 if (tune_index >= 0)
36565 rs6000_tune_index = tune_index;
36575 bool invert = false;
36579 if (strncmp (r, "no-", 3) == 0)
36585 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
36586 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
36588 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
36590 if (!rs6000_opt_masks[i].valid_target)
36591 not_valid_p = true;
36595 rs6000_isa_flags_explicit |= mask;
36597 /* VSX needs altivec, so -mvsx automagically sets
36598 altivec and disables -mavoid-indexed-addresses. */
36601 if (mask == OPTION_MASK_VSX)
36603 mask |= OPTION_MASK_ALTIVEC;
36604 TARGET_AVOID_XFORM = 0;
36608 if (rs6000_opt_masks[i].invert)
36612 rs6000_isa_flags &= ~mask;
36614 rs6000_isa_flags |= mask;
36619 if (error_p && !not_valid_p)
36621 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
36622 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
36624 size_t j = rs6000_opt_vars[i].global_offset;
36625 *((int *) ((char *)&global_options + j)) = !invert;
36627 not_valid_p = false;
36635 const char *eprefix, *esuffix;
36640 eprefix = "__attribute__((__target__(";
36645 eprefix = "#pragma GCC target ";
36650 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
36652 else if (not_valid_p)
36653 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
36655 error ("%s%qs%s is invalid", eprefix, q, esuffix);
36660 else if (TREE_CODE (args) == TREE_LIST)
36664 tree value = TREE_VALUE (args);
36667 bool ret2 = rs6000_inner_target_options (value, attr_p);
36671 args = TREE_CHAIN (args);
36673 while (args != NULL_TREE);
36678 error ("attribute %<target%> argument not a string");
36685 /* Print out the target options as a list for -mdebug=target. */
36688 rs6000_debug_target_options (tree args, const char *prefix)
36690 if (args == NULL_TREE)
36691 fprintf (stderr, "%s<NULL>", prefix);
36693 else if (TREE_CODE (args) == STRING_CST)
36695 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36698 while ((q = strtok (p, ",")) != NULL)
36701 fprintf (stderr, "%s\"%s\"", prefix, q);
36706 else if (TREE_CODE (args) == TREE_LIST)
36710 tree value = TREE_VALUE (args);
36713 rs6000_debug_target_options (value, prefix);
36716 args = TREE_CHAIN (args);
36718 while (args != NULL_TREE);
36722 gcc_unreachable ();
36728 /* Hook to validate attribute((target("..."))). */
36731 rs6000_valid_attribute_p (tree fndecl,
36732 tree ARG_UNUSED (name),
36736 struct cl_target_option cur_target;
36739 tree new_target, new_optimize;
36740 tree func_optimize;
36742 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
36744 if (TARGET_DEBUG_TARGET)
36746 tree tname = DECL_NAME (fndecl);
36747 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
36749 fprintf (stderr, "function: %.*s\n",
36750 (int) IDENTIFIER_LENGTH (tname),
36751 IDENTIFIER_POINTER (tname));
36753 fprintf (stderr, "function: unknown\n");
36755 fprintf (stderr, "args:");
36756 rs6000_debug_target_options (args, " ");
36757 fprintf (stderr, "\n");
36760 fprintf (stderr, "flags: 0x%x\n", flags);
36762 fprintf (stderr, "--------------------\n");
36765 /* attribute((target("default"))) does nothing, beyond
36766 affecting multi-versioning. */
36767 if (TREE_VALUE (args)
36768 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
36769 && TREE_CHAIN (args) == NULL_TREE
36770 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
36773 old_optimize = build_optimization_node (&global_options);
36774 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
36776 /* If the function changed the optimization levels as well as setting target
36777 options, start with the optimizations specified. */
36778 if (func_optimize && func_optimize != old_optimize)
36779 cl_optimization_restore (&global_options,
36780 TREE_OPTIMIZATION (func_optimize));
36782 /* The target attributes may also change some optimization flags, so update
36783 the optimization options if necessary. */
36784 cl_target_option_save (&cur_target, &global_options);
36785 rs6000_cpu_index = rs6000_tune_index = -1;
36786 ret = rs6000_inner_target_options (args, true);
36788 /* Set up any additional state. */
36791 ret = rs6000_option_override_internal (false);
36792 new_target = build_target_option_node (&global_options);
36797 new_optimize = build_optimization_node (&global_options);
36804 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
36806 if (old_optimize != new_optimize)
36807 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
36810 cl_target_option_restore (&global_options, &cur_target);
36812 if (old_optimize != new_optimize)
36813 cl_optimization_restore (&global_options,
36814 TREE_OPTIMIZATION (old_optimize));
36820 /* Hook to validate the current #pragma GCC target and set the state, and
36821 update the macros based on what was changed. If ARGS is NULL, then
36822 POP_TARGET is used to reset the options. */
36825 rs6000_pragma_target_parse (tree args, tree pop_target)
36827 tree prev_tree = build_target_option_node (&global_options);
36829 struct cl_target_option *prev_opt, *cur_opt;
36830 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
36831 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
36833 if (TARGET_DEBUG_TARGET)
36835 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
36836 fprintf (stderr, "args:");
36837 rs6000_debug_target_options (args, " ");
36838 fprintf (stderr, "\n");
36842 fprintf (stderr, "pop_target:\n");
36843 debug_tree (pop_target);
36846 fprintf (stderr, "pop_target: <NULL>\n");
36848 fprintf (stderr, "--------------------\n");
36853 cur_tree = ((pop_target)
36855 : target_option_default_node);
36856 cl_target_option_restore (&global_options,
36857 TREE_TARGET_OPTION (cur_tree));
36861 rs6000_cpu_index = rs6000_tune_index = -1;
36862 if (!rs6000_inner_target_options (args, false)
36863 || !rs6000_option_override_internal (false)
36864 || (cur_tree = build_target_option_node (&global_options))
36867 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
36868 fprintf (stderr, "invalid pragma\n");
36874 target_option_current_node = cur_tree;
36875 rs6000_activate_target_options (target_option_current_node);
36877 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
36878 change the macros that are defined. */
36879 if (rs6000_target_modify_macros_ptr)
36881 prev_opt = TREE_TARGET_OPTION (prev_tree);
36882 prev_bumask = prev_opt->x_rs6000_builtin_mask;
36883 prev_flags = prev_opt->x_rs6000_isa_flags;
36885 cur_opt = TREE_TARGET_OPTION (cur_tree);
36886 cur_flags = cur_opt->x_rs6000_isa_flags;
36887 cur_bumask = cur_opt->x_rs6000_builtin_mask;
36889 diff_bumask = (prev_bumask ^ cur_bumask);
36890 diff_flags = (prev_flags ^ cur_flags);
36892 if ((diff_flags != 0) || (diff_bumask != 0))
36894 /* Delete old macros. */
36895 rs6000_target_modify_macros_ptr (false,
36896 prev_flags & diff_flags,
36897 prev_bumask & diff_bumask);
36899 /* Define new macros. */
36900 rs6000_target_modify_macros_ptr (true,
36901 cur_flags & diff_flags,
36902 cur_bumask & diff_bumask);
36910 /* Remember the last target of rs6000_set_current_function. */
36911 static GTY(()) tree rs6000_previous_fndecl;
36913 /* Restore target's globals from NEW_TREE and invalidate the
36914 rs6000_previous_fndecl cache. */
36917 rs6000_activate_target_options (tree new_tree)
36919 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
36920 if (TREE_TARGET_GLOBALS (new_tree))
36921 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
36922 else if (new_tree == target_option_default_node)
36923 restore_target_globals (&default_target_globals);
36925 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
36926 rs6000_previous_fndecl = NULL_TREE;
36929 /* Establish appropriate back-end context for processing the function
36930 FNDECL. The argument might be NULL to indicate processing at top
36931 level, outside of any function scope. */
36933 rs6000_set_current_function (tree fndecl)
36935 if (TARGET_DEBUG_TARGET)
36937 fprintf (stderr, "\n==================== rs6000_set_current_function");
36940 fprintf (stderr, ", fndecl %s (%p)",
36941 (DECL_NAME (fndecl)
36942 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
36943 : "<unknown>"), (void *)fndecl);
36945 if (rs6000_previous_fndecl)
36946 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
36948 fprintf (stderr, "\n");
36951 /* Only change the context if the function changes. This hook is called
36952 several times in the course of compiling a function, and we don't want to
36953 slow things down too much or call target_reinit when it isn't safe. */
36954 if (fndecl == rs6000_previous_fndecl)
36958 if (rs6000_previous_fndecl == NULL_TREE)
36959 old_tree = target_option_current_node;
36960 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
36961 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
36963 old_tree = target_option_default_node;
36966 if (fndecl == NULL_TREE)
36968 if (old_tree != target_option_current_node)
36969 new_tree = target_option_current_node;
36971 new_tree = NULL_TREE;
36975 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
36976 if (new_tree == NULL_TREE)
36977 new_tree = target_option_default_node;
36980 if (TARGET_DEBUG_TARGET)
36984 fprintf (stderr, "\nnew fndecl target specific options:\n");
36985 debug_tree (new_tree);
36990 fprintf (stderr, "\nold fndecl target specific options:\n");
36991 debug_tree (old_tree);
36994 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
36995 fprintf (stderr, "--------------------\n");
36998 if (new_tree && old_tree != new_tree)
36999 rs6000_activate_target_options (new_tree);
37002 rs6000_previous_fndecl = fndecl;
37006 /* Save the current options */
37009 rs6000_function_specific_save (struct cl_target_option *ptr,
37010 struct gcc_options *opts)
37012 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
37013 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
37016 /* Restore the current options */
37019 rs6000_function_specific_restore (struct gcc_options *opts,
37020 struct cl_target_option *ptr)
37023 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
37024 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
37025 (void) rs6000_option_override_internal (false);
37028 /* Print the current options */
37031 rs6000_function_specific_print (FILE *file, int indent,
37032 struct cl_target_option *ptr)
37034 rs6000_print_isa_options (file, indent, "Isa options set",
37035 ptr->x_rs6000_isa_flags);
37037 rs6000_print_isa_options (file, indent, "Isa options explicit",
37038 ptr->x_rs6000_isa_flags_explicit);
37041 /* Helper function to print the current isa or misc options on a line. */
37044 rs6000_print_options_internal (FILE *file,
37046 const char *string,
37047 HOST_WIDE_INT flags,
37048 const char *prefix,
37049 const struct rs6000_opt_mask *opts,
37050 size_t num_elements)
37053 size_t start_column = 0;
37055 size_t max_column = 120;
37056 size_t prefix_len = strlen (prefix);
37057 size_t comma_len = 0;
37058 const char *comma = "";
37061 start_column += fprintf (file, "%*s", indent, "");
37065 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
37069 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
37071 /* Print the various mask options. */
37072 cur_column = start_column;
37073 for (i = 0; i < num_elements; i++)
37075 bool invert = opts[i].invert;
37076 const char *name = opts[i].name;
37077 const char *no_str = "";
37078 HOST_WIDE_INT mask = opts[i].mask;
37079 size_t len = comma_len + prefix_len + strlen (name);
37083 if ((flags & mask) == 0)
37086 len += sizeof ("no-") - 1;
37094 if ((flags & mask) != 0)
37097 len += sizeof ("no-") - 1;
37104 if (cur_column > max_column)
37106 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
37107 cur_column = start_column + len;
37111 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
37113 comma_len = sizeof (", ") - 1;
37116 fputs ("\n", file);
37119 /* Helper function to print the current isa options on a line. */
37122 rs6000_print_isa_options (FILE *file, int indent, const char *string,
37123 HOST_WIDE_INT flags)
37125 rs6000_print_options_internal (file, indent, string, flags, "-m",
37126 &rs6000_opt_masks[0],
37127 ARRAY_SIZE (rs6000_opt_masks));
37131 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
37132 HOST_WIDE_INT flags)
37134 rs6000_print_options_internal (file, indent, string, flags, "",
37135 &rs6000_builtin_mask_names[0],
37136 ARRAY_SIZE (rs6000_builtin_mask_names));
37139 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
37140 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
37141 -mupper-regs-df, etc.).
37143 If the user used -mno-power8-vector, we need to turn off all of the implicit
37144 ISA 2.07 and 3.0 options that relate to the vector unit.
37146 If the user used -mno-power9-vector, we need to turn off all of the implicit
37147 ISA 3.0 options that relate to the vector unit.
37149 This function does not handle explicit options such as the user specifying
37150 -mdirect-move. These are handled in rs6000_option_override_internal, and
37151 the appropriate error is given if needed.
37153 We return a mask of all of the implicit options that should not be enabled
37156 static HOST_WIDE_INT
37157 rs6000_disable_incompatible_switches (void)
37159 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
37162 static const struct {
37163 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
37164 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
37165 const char *const name; /* name of the switch. */
37167 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
37168 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
37169 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
37172 for (i = 0; i < ARRAY_SIZE (flags); i++)
37174 HOST_WIDE_INT no_flag = flags[i].no_flag;
37176 if ((rs6000_isa_flags & no_flag) == 0
37177 && (rs6000_isa_flags_explicit & no_flag) != 0)
37179 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
37180 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
37186 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
37187 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
37189 set_flags &= ~rs6000_opt_masks[j].mask;
37190 error ("%<-mno-%s%> turns off %<-m%s%>",
37192 rs6000_opt_masks[j].name);
37195 gcc_assert (!set_flags);
37198 rs6000_isa_flags &= ~dep_flags;
37199 ignore_masks |= no_flag | dep_flags;
37203 return ignore_masks;
37207 /* Helper function for printing the function name when debugging. */
37209 static const char *
37210 get_decl_name (tree fn)
37217 name = DECL_NAME (fn);
37219 return "<no-name>";
37221 return IDENTIFIER_POINTER (name);
37224 /* Return the clone id of the target we are compiling code for in a target
37225 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
37226 the priority list for the target clones (ordered from lowest to
37230 rs6000_clone_priority (tree fndecl)
37232 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
37233 HOST_WIDE_INT isa_masks;
37234 int ret = CLONE_DEFAULT;
37235 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
37236 const char *attrs_str = NULL;
37238 attrs = TREE_VALUE (TREE_VALUE (attrs));
37239 attrs_str = TREE_STRING_POINTER (attrs);
37241 /* Return priority zero for default function. Return the ISA needed for the
37242 function if it is not the default. */
37243 if (strcmp (attrs_str, "default") != 0)
37245 if (fn_opts == NULL_TREE)
37246 fn_opts = target_option_default_node;
37248 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
37249 isa_masks = rs6000_isa_flags;
37251 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
37253 for (ret = CLONE_MAX - 1; ret != 0; ret--)
37254 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
37258 if (TARGET_DEBUG_TARGET)
37259 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
37260 get_decl_name (fndecl), ret);
37265 /* This compares the priority of target features in function DECL1 and DECL2.
37266 It returns positive value if DECL1 is higher priority, negative value if
37267 DECL2 is higher priority and 0 if they are the same. Note, priorities are
37268 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
37271 rs6000_compare_version_priority (tree decl1, tree decl2)
37273 int priority1 = rs6000_clone_priority (decl1);
37274 int priority2 = rs6000_clone_priority (decl2);
37275 int ret = priority1 - priority2;
37277 if (TARGET_DEBUG_TARGET)
37278 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
37279 get_decl_name (decl1), get_decl_name (decl2), ret);
37284 /* Make a dispatcher declaration for the multi-versioned function DECL.
37285 Calls to DECL function will be replaced with calls to the dispatcher
37286 by the front-end. Returns the decl of the dispatcher function. */
37289 rs6000_get_function_versions_dispatcher (void *decl)
37291 tree fn = (tree) decl;
37292 struct cgraph_node *node = NULL;
37293 struct cgraph_node *default_node = NULL;
37294 struct cgraph_function_version_info *node_v = NULL;
37295 struct cgraph_function_version_info *first_v = NULL;
37297 tree dispatch_decl = NULL;
37299 struct cgraph_function_version_info *default_version_info = NULL;
37300 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
37302 if (TARGET_DEBUG_TARGET)
37303 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
37304 get_decl_name (fn));
37306 node = cgraph_node::get (fn);
37307 gcc_assert (node != NULL);
37309 node_v = node->function_version ();
37310 gcc_assert (node_v != NULL);
37312 if (node_v->dispatcher_resolver != NULL)
37313 return node_v->dispatcher_resolver;
37315 /* Find the default version and make it the first node. */
37317 /* Go to the beginning of the chain. */
37318 while (first_v->prev != NULL)
37319 first_v = first_v->prev;
37321 default_version_info = first_v;
37322 while (default_version_info != NULL)
37324 const tree decl2 = default_version_info->this_node->decl;
37325 if (is_function_default_version (decl2))
37327 default_version_info = default_version_info->next;
37330 /* If there is no default node, just return NULL. */
37331 if (default_version_info == NULL)
37334 /* Make default info the first node. */
37335 if (first_v != default_version_info)
37337 default_version_info->prev->next = default_version_info->next;
37338 if (default_version_info->next)
37339 default_version_info->next->prev = default_version_info->prev;
37340 first_v->prev = default_version_info;
37341 default_version_info->next = first_v;
37342 default_version_info->prev = NULL;
37345 default_node = default_version_info->this_node;
37347 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
37348 error_at (DECL_SOURCE_LOCATION (default_node->decl),
37349 "target_clones attribute needs GLIBC (2.23 and newer) that "
37350 "exports hardware capability bits");
37353 if (targetm.has_ifunc_p ())
37355 struct cgraph_function_version_info *it_v = NULL;
37356 struct cgraph_node *dispatcher_node = NULL;
37357 struct cgraph_function_version_info *dispatcher_version_info = NULL;
37359 /* Right now, the dispatching is done via ifunc. */
37360 dispatch_decl = make_dispatcher_decl (default_node->decl);
37362 dispatcher_node = cgraph_node::get_create (dispatch_decl);
37363 gcc_assert (dispatcher_node != NULL);
37364 dispatcher_node->dispatcher_function = 1;
37365 dispatcher_version_info
37366 = dispatcher_node->insert_new_function_version ();
37367 dispatcher_version_info->next = default_version_info;
37368 dispatcher_node->definition = 1;
37370 /* Set the dispatcher for all the versions. */
37371 it_v = default_version_info;
37372 while (it_v != NULL)
37374 it_v->dispatcher_resolver = dispatch_decl;
37380 error_at (DECL_SOURCE_LOCATION (default_node->decl),
37381 "multiversioning needs ifunc which is not supported "
37386 return dispatch_decl;
37389 /* Make the resolver function decl to dispatch the versions of a multi-
37390 versioned function, DEFAULT_DECL. Create an empty basic block in the
37391 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
37395 make_resolver_func (const tree default_decl,
37396 const tree dispatch_decl,
37397 basic_block *empty_bb)
37399 /* Make the resolver function static. The resolver function returns
37401 tree decl_name = clone_function_name (default_decl, "resolver");
37402 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
37403 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
37404 tree decl = build_fn_decl (resolver_name, type);
37405 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
37407 DECL_NAME (decl) = decl_name;
37408 TREE_USED (decl) = 1;
37409 DECL_ARTIFICIAL (decl) = 1;
37410 DECL_IGNORED_P (decl) = 0;
37411 TREE_PUBLIC (decl) = 0;
37412 DECL_UNINLINABLE (decl) = 1;
37414 /* Resolver is not external, body is generated. */
37415 DECL_EXTERNAL (decl) = 0;
37416 DECL_EXTERNAL (dispatch_decl) = 0;
37418 DECL_CONTEXT (decl) = NULL_TREE;
37419 DECL_INITIAL (decl) = make_node (BLOCK);
37420 DECL_STATIC_CONSTRUCTOR (decl) = 0;
37422 /* Build result decl and add to function_decl. */
37423 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
37424 DECL_ARTIFICIAL (t) = 1;
37425 DECL_IGNORED_P (t) = 1;
37426 DECL_RESULT (decl) = t;
37428 gimplify_function_tree (decl);
37429 push_cfun (DECL_STRUCT_FUNCTION (decl));
37430 *empty_bb = init_lowered_empty_function (decl, false,
37431 profile_count::uninitialized ());
37433 cgraph_node::add_new_function (decl, true);
37434 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
37438 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
37439 DECL_ATTRIBUTES (dispatch_decl)
37440 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
37442 cgraph_node::create_same_body_alias (dispatch_decl, decl);
37447 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
37448 return a pointer to VERSION_DECL if we are running on a machine that
37449 supports the index CLONE_ISA hardware architecture bits. This function will
37450 be called during version dispatch to decide which function version to
37451 execute. It returns the basic block at the end, to which more conditions
37455 add_condition_to_bb (tree function_decl, tree version_decl,
37456 int clone_isa, basic_block new_bb)
37458 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
37460 gcc_assert (new_bb != NULL);
37461 gimple_seq gseq = bb_seq (new_bb);
37464 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
37465 build_fold_addr_expr (version_decl));
37466 tree result_var = create_tmp_var (ptr_type_node);
37467 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
37468 gimple *return_stmt = gimple_build_return (result_var);
37470 if (clone_isa == CLONE_DEFAULT)
37472 gimple_seq_add_stmt (&gseq, convert_stmt);
37473 gimple_seq_add_stmt (&gseq, return_stmt);
37474 set_bb_seq (new_bb, gseq);
37475 gimple_set_bb (convert_stmt, new_bb);
37476 gimple_set_bb (return_stmt, new_bb);
37481 tree bool_zero = build_int_cst (bool_int_type_node, 0);
37482 tree cond_var = create_tmp_var (bool_int_type_node);
37483 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
37484 const char *arg_str = rs6000_clone_map[clone_isa].name;
37485 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
37486 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
37487 gimple_call_set_lhs (call_cond_stmt, cond_var);
37489 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
37490 gimple_set_bb (call_cond_stmt, new_bb);
37491 gimple_seq_add_stmt (&gseq, call_cond_stmt);
37493 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
37494 NULL_TREE, NULL_TREE);
37495 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
37496 gimple_set_bb (if_else_stmt, new_bb);
37497 gimple_seq_add_stmt (&gseq, if_else_stmt);
37499 gimple_seq_add_stmt (&gseq, convert_stmt);
37500 gimple_seq_add_stmt (&gseq, return_stmt);
37501 set_bb_seq (new_bb, gseq);
37503 basic_block bb1 = new_bb;
37504 edge e12 = split_block (bb1, if_else_stmt);
37505 basic_block bb2 = e12->dest;
37506 e12->flags &= ~EDGE_FALLTHRU;
37507 e12->flags |= EDGE_TRUE_VALUE;
37509 edge e23 = split_block (bb2, return_stmt);
37510 gimple_set_bb (convert_stmt, bb2);
37511 gimple_set_bb (return_stmt, bb2);
37513 basic_block bb3 = e23->dest;
37514 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
37517 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
37523 /* This function generates the dispatch function for multi-versioned functions.
37524 DISPATCH_DECL is the function which will contain the dispatch logic.
37525 FNDECLS are the function choices for dispatch, and is a tree chain.
37526 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
37527 code is generated. */
37530 dispatch_function_versions (tree dispatch_decl,
37532 basic_block *empty_bb)
37536 vec<tree> *fndecls;
37537 tree clones[CLONE_MAX];
37539 if (TARGET_DEBUG_TARGET)
37540 fputs ("dispatch_function_versions, top\n", stderr);
37542 gcc_assert (dispatch_decl != NULL
37543 && fndecls_p != NULL
37544 && empty_bb != NULL);
37546 /* fndecls_p is actually a vector. */
37547 fndecls = static_cast<vec<tree> *> (fndecls_p);
37549 /* At least one more version other than the default. */
37550 gcc_assert (fndecls->length () >= 2);
37552 /* The first version in the vector is the default decl. */
37553 memset ((void *) clones, '\0', sizeof (clones));
37554 clones[CLONE_DEFAULT] = (*fndecls)[0];
37556 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
37557 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
37558 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
37559 recent glibc. If we ever need to call __builtin_cpu_init, we would need
37560 to insert the code here to do the call. */
37562 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
37564 int priority = rs6000_clone_priority (ele);
37565 if (!clones[priority])
37566 clones[priority] = ele;
37569 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
37572 if (TARGET_DEBUG_TARGET)
37573 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
37574 ix, get_decl_name (clones[ix]));
37576 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
37583 /* Generate the dispatching code body to dispatch multi-versioned function
37584 DECL. The target hook is called to process the "target" attributes and
37585 provide the code to dispatch the right function at run-time. NODE points
37586 to the dispatcher decl whose body will be created. */
37589 rs6000_generate_version_dispatcher_body (void *node_p)
37592 basic_block empty_bb;
37593 struct cgraph_node *node = (cgraph_node *) node_p;
37594 struct cgraph_function_version_info *ninfo = node->function_version ();
37596 if (ninfo->dispatcher_resolver)
37597 return ninfo->dispatcher_resolver;
37599 /* node is going to be an alias, so remove the finalized bit. */
37600 node->definition = false;
37602 /* The first version in the chain corresponds to the default version. */
37603 ninfo->dispatcher_resolver = resolver
37604 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
37606 if (TARGET_DEBUG_TARGET)
37607 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
37608 get_decl_name (resolver));
37610 push_cfun (DECL_STRUCT_FUNCTION (resolver));
37611 auto_vec<tree, 2> fn_ver_vec;
37613 for (struct cgraph_function_version_info *vinfo = ninfo->next;
37615 vinfo = vinfo->next)
37617 struct cgraph_node *version = vinfo->this_node;
37618 /* Check for virtual functions here again, as by this time it should
37619 have been determined if this function needs a vtable index or
37620 not. This happens for methods in derived classes that override
37621 virtual methods in base classes but are not explicitly marked as
37623 if (DECL_VINDEX (version->decl))
37624 sorry ("Virtual function multiversioning not supported");
37626 fn_ver_vec.safe_push (version->decl);
37629 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
37630 cgraph_edge::rebuild_edges ();
37636 /* Hook to determine if one function can safely inline another. */
37639 rs6000_can_inline_p (tree caller, tree callee)
37642 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
37643 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
37645 /* If callee has no option attributes, then it is ok to inline. */
37649 /* If caller has no option attributes, but callee does then it is not ok to
37651 else if (!caller_tree)
37656 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
37657 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
37659 /* Callee's options should a subset of the caller's, i.e. a vsx function
37660 can inline an altivec function but a non-vsx function can't inline a
37662 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
37663 == callee_opts->x_rs6000_isa_flags)
37667 if (TARGET_DEBUG_TARGET)
37668 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
37669 get_decl_name (caller), get_decl_name (callee),
37670 (ret ? "can" : "cannot"));
37675 /* Allocate a stack temp and fixup the address so it meets the particular
37676 memory requirements (either offetable or REG+REG addressing). */
37679 rs6000_allocate_stack_temp (machine_mode mode,
37680 bool offsettable_p,
37683 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
37684 rtx addr = XEXP (stack, 0);
37685 int strict_p = reload_completed;
37687 if (!legitimate_indirect_address_p (addr, strict_p))
37690 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
37691 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37693 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
37694 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37700 /* Given a memory reference, if it is not a reg or reg+reg addressing,
37701 convert to such a form to deal with memory reference instructions
37702 like STFIWX and LDBRX that only take reg+reg addressing. */
37705 rs6000_force_indexed_or_indirect_mem (rtx x)
37707 machine_mode mode = GET_MODE (x);
37709 gcc_assert (MEM_P (x));
37710 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
37712 rtx addr = XEXP (x, 0);
37713 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
37715 rtx reg = XEXP (addr, 0);
37716 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
37717 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
37718 gcc_assert (REG_P (reg));
37719 emit_insn (gen_add3_insn (reg, reg, size_rtx));
37722 else if (GET_CODE (addr) == PRE_MODIFY)
37724 rtx reg = XEXP (addr, 0);
37725 rtx expr = XEXP (addr, 1);
37726 gcc_assert (REG_P (reg));
37727 gcc_assert (GET_CODE (expr) == PLUS);
37728 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
37732 x = replace_equiv_address (x, force_reg (Pmode, addr));
37738 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
37740 On the RS/6000, all integer constants are acceptable, most won't be valid
37741 for particular insns, though. Only easy FP constants are acceptable. */
37744 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
37746 if (TARGET_ELF && tls_referenced_p (x))
37749 if (CONST_DOUBLE_P (x))
37750 return easy_fp_constant (x, mode);
37752 if (GET_CODE (x) == CONST_VECTOR)
37753 return easy_vector_constant (x, mode);
37759 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
37762 chain_already_loaded (rtx_insn *last)
37764 for (; last != NULL; last = PREV_INSN (last))
37766 if (NONJUMP_INSN_P (last))
37768 rtx patt = PATTERN (last);
37770 if (GET_CODE (patt) == SET)
37772 rtx lhs = XEXP (patt, 0);
37774 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
37782 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
37785 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
37787 rtx func = func_desc;
37788 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
37789 rtx toc_load = NULL_RTX;
37790 rtx toc_restore = NULL_RTX;
37792 rtx abi_reg = NULL_RTX;
37798 tlsarg = global_tlsarg;
37800 /* Handle longcall attributes. */
37801 if ((INTVAL (cookie) & CALL_LONG) != 0
37802 && GET_CODE (func_desc) == SYMBOL_REF)
37803 func = rs6000_longcall_ref (func_desc, tlsarg);
37805 /* Handle indirect calls. */
37806 if (!SYMBOL_REF_P (func)
37807 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
37809 /* Save the TOC into its reserved slot before the call,
37810 and prepare to restore it after the call. */
37811 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
37812 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
37813 gen_rtvec (1, stack_toc_offset),
37815 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
37817 /* Can we optimize saving the TOC in the prologue or
37818 do we need to do it at every call? */
37819 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
37820 cfun->machine->save_toc_in_prologue = true;
37823 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
37824 rtx stack_toc_mem = gen_frame_mem (Pmode,
37825 gen_rtx_PLUS (Pmode, stack_ptr,
37826 stack_toc_offset));
37827 MEM_VOLATILE_P (stack_toc_mem) = 1;
37829 && DEFAULT_ABI == ABI_ELFv2
37830 && GET_CODE (func_desc) == SYMBOL_REF)
37832 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
37833 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
37834 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
37837 emit_move_insn (stack_toc_mem, toc_reg);
37840 if (DEFAULT_ABI == ABI_ELFv2)
37842 /* A function pointer in the ELFv2 ABI is just a plain address, but
37843 the ABI requires it to be loaded into r12 before the call. */
37844 func_addr = gen_rtx_REG (Pmode, 12);
37845 if (!rtx_equal_p (func_addr, func))
37846 emit_move_insn (func_addr, func);
37847 abi_reg = func_addr;
37848 /* Indirect calls via CTR are strongly preferred over indirect
37849 calls via LR, so move the address there. Needed to mark
37850 this insn for linker plt sequence editing too. */
37851 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
37853 && GET_CODE (func_desc) == SYMBOL_REF)
37855 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
37856 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
37857 emit_insn (gen_rtx_SET (func_addr, mark_func));
37858 v = gen_rtvec (2, func_addr, func_desc);
37859 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
37862 emit_move_insn (func_addr, abi_reg);
37866 /* A function pointer under AIX is a pointer to a data area whose
37867 first word contains the actual address of the function, whose
37868 second word contains a pointer to its TOC, and whose third word
37869 contains a value to place in the static chain register (r11).
37870 Note that if we load the static chain, our "trampoline" need
37871 not have any executable code. */
37873 /* Load up address of the actual function. */
37874 func = force_reg (Pmode, func);
37875 func_addr = gen_reg_rtx (Pmode);
37876 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
37878 /* Indirect calls via CTR are strongly preferred over indirect
37879 calls via LR, so move the address there. */
37880 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
37881 emit_move_insn (ctr_reg, func_addr);
37882 func_addr = ctr_reg;
37884 /* Prepare to load the TOC of the called function. Note that the
37885 TOC load must happen immediately before the actual call so
37886 that unwinding the TOC registers works correctly. See the
37887 comment in frob_update_context. */
37888 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
37889 rtx func_toc_mem = gen_rtx_MEM (Pmode,
37890 gen_rtx_PLUS (Pmode, func,
37892 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
37894 /* If we have a static chain, load it up. But, if the call was
37895 originally direct, the 3rd word has not been written since no
37896 trampoline has been built, so we ought not to load it, lest we
37897 override a static chain value. */
37898 if (!(GET_CODE (func_desc) == SYMBOL_REF
37899 && SYMBOL_REF_FUNCTION_P (func_desc))
37900 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
37901 && !chain_already_loaded (get_current_sequence ()->next->last))
37903 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
37904 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
37905 rtx func_sc_mem = gen_rtx_MEM (Pmode,
37906 gen_rtx_PLUS (Pmode, func,
37908 emit_move_insn (sc_reg, func_sc_mem);
37915 /* Direct calls use the TOC: for local calls, the callee will
37916 assume the TOC register is set; for non-local calls, the
37917 PLT stub needs the TOC register. */
37922 /* Create the call. */
37923 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
37924 if (value != NULL_RTX)
37925 call[0] = gen_rtx_SET (value, call[0]);
37929 call[n_call++] = toc_load;
37931 call[n_call++] = toc_restore;
37933 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
37935 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
37936 insn = emit_call_insn (insn);
37938 /* Mention all registers defined by the ABI to hold information
37939 as uses in CALL_INSN_FUNCTION_USAGE. */
37941 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
37944 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
37947 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
37952 gcc_assert (INTVAL (cookie) == 0);
37955 tlsarg = global_tlsarg;
37957 /* Create the call. */
37958 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
37959 if (value != NULL_RTX)
37960 call[0] = gen_rtx_SET (value, call[0]);
37962 call[1] = simple_return_rtx;
37964 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
37965 insn = emit_call_insn (insn);
37967 /* Note use of the TOC register. */
37968 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
37971 /* Expand code to perform a call under the SYSV4 ABI. */
37974 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
37976 rtx func = func_desc;
37980 rtx abi_reg = NULL_RTX;
37984 tlsarg = global_tlsarg;
37986 /* Handle longcall attributes. */
37987 if ((INTVAL (cookie) & CALL_LONG) != 0
37988 && GET_CODE (func_desc) == SYMBOL_REF)
37990 func = rs6000_longcall_ref (func_desc, tlsarg);
37991 /* If the longcall was implemented using PLT16 relocs, then r11
37992 needs to be valid at the call for lazy linking. */
37993 if (HAVE_AS_PLTSEQ)
37997 /* Handle indirect calls. */
37998 if (GET_CODE (func) != SYMBOL_REF)
38000 func = force_reg (Pmode, func);
38002 /* Indirect calls via CTR are strongly preferred over indirect
38003 calls via LR, so move the address there. Needed to mark
38004 this insn for linker plt sequence editing too. */
38005 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
38007 && GET_CODE (func_desc) == SYMBOL_REF)
38009 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
38010 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
38011 emit_insn (gen_rtx_SET (func_addr, mark_func));
38012 v = gen_rtvec (2, func_addr, func_desc);
38013 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
38016 emit_move_insn (func_addr, func);
38021 /* Create the call. */
38022 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
38023 if (value != NULL_RTX)
38024 call[0] = gen_rtx_SET (value, call[0]);
38026 call[1] = gen_rtx_USE (VOIDmode, cookie);
38028 if (TARGET_SECURE_PLT
38030 && GET_CODE (func_addr) == SYMBOL_REF
38031 && !SYMBOL_REF_LOCAL_P (func_addr))
38032 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
38034 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
38036 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
38037 insn = emit_call_insn (insn);
38039 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
38042 /* Expand code to perform a sibling call under the SysV4 ABI. */
38045 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
38047 rtx func = func_desc;
38051 rtx abi_reg = NULL_RTX;
38054 tlsarg = global_tlsarg;
38056 /* Handle longcall attributes. */
38057 if ((INTVAL (cookie) & CALL_LONG) != 0
38058 && GET_CODE (func_desc) == SYMBOL_REF)
38060 func = rs6000_longcall_ref (func_desc, tlsarg);
38061 /* If the longcall was implemented using PLT16 relocs, then r11
38062 needs to be valid at the call for lazy linking. */
38063 if (HAVE_AS_PLTSEQ)
38067 /* Handle indirect calls. */
38068 if (GET_CODE (func) != SYMBOL_REF)
38070 func = force_reg (Pmode, func);
38072 /* Indirect sibcalls must go via CTR. Needed to mark
38073 this insn for linker plt sequence editing too. */
38074 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
38076 && GET_CODE (func_desc) == SYMBOL_REF)
38078 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
38079 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
38080 emit_insn (gen_rtx_SET (func_addr, mark_func));
38081 v = gen_rtvec (2, func_addr, func_desc);
38082 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
38085 emit_move_insn (func_addr, func);
38090 /* Create the call. */
38091 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
38092 if (value != NULL_RTX)
38093 call[0] = gen_rtx_SET (value, call[0]);
38095 call[1] = gen_rtx_USE (VOIDmode, cookie);
38096 call[2] = simple_return_rtx;
38098 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
38099 insn = emit_call_insn (insn);
38101 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
38106 /* Expand code to perform a call under the Darwin ABI.
38107 Modulo handling of mlongcall, this is much the same as sysv.
38108 if/when the longcall optimisation is removed, we could drop this
38109 code and use the sysv case (taking care to avoid the tls stuff).
38111 We can use this for sibcalls too, if needed. */
38114 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
38115 rtx cookie, bool sibcall)
38117 rtx func = func_desc;
38121 int cookie_val = INTVAL (cookie);
38122 bool make_island = false;
38124 /* Handle longcall attributes, there are two cases for Darwin:
38125 1) Newer linkers are capable of synthesising any branch islands needed.
38126 2) We need a helper branch island synthesised by the compiler.
38127 The second case has mostly been retired and we don't use it for m64.
38128 In fact, it's is an optimisation, we could just indirect as sysv does..
38129 ... however, backwards compatibility for now.
38130 If we're going to use this, then we need to keep the CALL_LONG bit set,
38131 so that we can pick up the special insn form later. */
38132 if ((cookie_val & CALL_LONG) != 0
38133 && GET_CODE (func_desc) == SYMBOL_REF)
38135 if (darwin_emit_branch_islands && TARGET_32BIT)
38136 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
38139 /* The linker is capable of doing this, but the user explicitly
38140 asked for -mlongcall, so we'll do the 'normal' version. */
38141 func = rs6000_longcall_ref (func_desc, NULL_RTX);
38142 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
38146 /* Handle indirect calls. */
38147 if (GET_CODE (func) != SYMBOL_REF)
38149 func = force_reg (Pmode, func);
38151 /* Indirect calls via CTR are strongly preferred over indirect
38152 calls via LR, and are required for indirect sibcalls, so move
38153 the address there. */
38154 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
38155 emit_move_insn (func_addr, func);
38160 /* Create the call. */
38161 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
38162 if (value != NULL_RTX)
38163 call[0] = gen_rtx_SET (value, call[0]);
38165 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
38168 call[2] = simple_return_rtx;
38170 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
38172 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
38173 insn = emit_call_insn (insn);
38174 /* Now we have the debug info in the insn, we can set up the branch island
38175 if we're using one. */
38178 tree funname = get_identifier (XSTR (func_desc, 0));
38180 if (no_previous_def (funname))
38182 rtx label_rtx = gen_label_rtx ();
38183 char *label_buf, temp_buf[256];
38184 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
38185 CODE_LABEL_NUMBER (label_rtx));
38186 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
38187 tree labelname = get_identifier (label_buf);
38188 add_compiler_branch_island (labelname, funname,
38189 insn_line ((const rtx_insn*)insn));
38196 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
38197 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
38200 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
38208 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
38209 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
38212 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
38219 /* Return whether we need to always update the saved TOC pointer when we update
38220 the stack pointer. */
38223 rs6000_save_toc_in_prologue_p (void)
38225 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
38228 #ifdef HAVE_GAS_HIDDEN
38229 # define USE_HIDDEN_LINKONCE 1
38231 # define USE_HIDDEN_LINKONCE 0
38234 /* Fills in the label name that should be used for a 476 link stack thunk. */
38237 get_ppc476_thunk_name (char name[32])
38239 gcc_assert (TARGET_LINK_STACK);
38241 if (USE_HIDDEN_LINKONCE)
38242 sprintf (name, "__ppc476.get_thunk");
38244 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
38247 /* This function emits the simple thunk routine that is used to preserve
38248 the link stack on the 476 cpu. */
38250 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
38252 rs6000_code_end (void)
38257 if (!TARGET_LINK_STACK)
38260 get_ppc476_thunk_name (name);
38262 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
38263 build_function_type_list (void_type_node, NULL_TREE));
38264 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
38265 NULL_TREE, void_type_node);
38266 TREE_PUBLIC (decl) = 1;
38267 TREE_STATIC (decl) = 1;
38270 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
38272 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
38273 targetm.asm_out.unique_section (decl, 0);
38274 switch_to_section (get_named_section (decl, NULL, 0));
38275 DECL_WEAK (decl) = 1;
38276 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
38277 targetm.asm_out.globalize_label (asm_out_file, name);
38278 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
38279 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
38284 switch_to_section (text_section);
38285 ASM_OUTPUT_LABEL (asm_out_file, name);
38288 DECL_INITIAL (decl) = make_node (BLOCK);
38289 current_function_decl = decl;
38290 allocate_struct_function (decl, false);
38291 init_function_start (decl);
38292 first_function_block_is_cold = false;
38293 /* Make sure unwind info is emitted for the thunk if needed. */
38294 final_start_function (emit_barrier (), asm_out_file, 1);
38296 fputs ("\tblr\n", asm_out_file);
38298 final_end_function ();
38299 init_insn_lengths ();
38300 free_after_compilation (cfun);
38302 current_function_decl = NULL;
38305 /* Add r30 to hard reg set if the prologue sets it up and it is not
38306 pic_offset_table_rtx. */
38309 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
38311 if (!TARGET_SINGLE_PIC_BASE
38313 && TARGET_MINIMAL_TOC
38314 && !constant_pool_empty_p ())
38315 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
38316 if (cfun->machine->split_stack_argp_used)
38317 add_to_hard_reg_set (&set->set, Pmode, 12);
38319 /* Make sure the hard reg set doesn't include r2, which was possibly added
38320 via PIC_OFFSET_TABLE_REGNUM. */
38322 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
38326 /* Helper function for rs6000_split_logical to emit a logical instruction after
38327 spliting the operation to single GPR registers.
38329 DEST is the destination register.
38330 OP1 and OP2 are the input source registers.
38331 CODE is the base operation (AND, IOR, XOR, NOT).
38332 MODE is the machine mode.
38333 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38334 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38335 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38338 rs6000_split_logical_inner (rtx dest,
38341 enum rtx_code code,
38343 bool complement_final_p,
38344 bool complement_op1_p,
38345 bool complement_op2_p)
38349 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
38350 if (op2 && CONST_INT_P (op2)
38351 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
38352 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38354 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
38355 HOST_WIDE_INT value = INTVAL (op2) & mask;
38357 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
38362 emit_insn (gen_rtx_SET (dest, const0_rtx));
38366 else if (value == mask)
38368 if (!rtx_equal_p (dest, op1))
38369 emit_insn (gen_rtx_SET (dest, op1));
38374 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
38375 into separate ORI/ORIS or XORI/XORIS instrucitons. */
38376 else if (code == IOR || code == XOR)
38380 if (!rtx_equal_p (dest, op1))
38381 emit_insn (gen_rtx_SET (dest, op1));
38387 if (code == AND && mode == SImode
38388 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38390 emit_insn (gen_andsi3 (dest, op1, op2));
38394 if (complement_op1_p)
38395 op1 = gen_rtx_NOT (mode, op1);
38397 if (complement_op2_p)
38398 op2 = gen_rtx_NOT (mode, op2);
38400 /* For canonical RTL, if only one arm is inverted it is the first. */
38401 if (!complement_op1_p && complement_op2_p)
38402 std::swap (op1, op2);
38404 bool_rtx = ((code == NOT)
38405 ? gen_rtx_NOT (mode, op1)
38406 : gen_rtx_fmt_ee (code, mode, op1, op2));
38408 if (complement_final_p)
38409 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
38411 emit_insn (gen_rtx_SET (dest, bool_rtx));
38414 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
38415 operations are split immediately during RTL generation to allow for more
38416 optimizations of the AND/IOR/XOR.
38418 OPERANDS is an array containing the destination and two input operands.
38419 CODE is the base operation (AND, IOR, XOR, NOT).
38420 MODE is the machine mode.
38421 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38422 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38423 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
38424 CLOBBER_REG is either NULL or a scratch register of type CC to allow
38425 formation of the AND instructions. */
38428 rs6000_split_logical_di (rtx operands[3],
38429 enum rtx_code code,
38430 bool complement_final_p,
38431 bool complement_op1_p,
38432 bool complement_op2_p)
38434 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
38435 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
38436 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
38437 enum hi_lo { hi = 0, lo = 1 };
38438 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
38441 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
38442 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
38443 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
38444 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
38447 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
38450 if (!CONST_INT_P (operands[2]))
38452 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
38453 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
38457 HOST_WIDE_INT value = INTVAL (operands[2]);
38458 HOST_WIDE_INT value_hi_lo[2];
38460 gcc_assert (!complement_final_p);
38461 gcc_assert (!complement_op1_p);
38462 gcc_assert (!complement_op2_p);
38464 value_hi_lo[hi] = value >> 32;
38465 value_hi_lo[lo] = value & lower_32bits;
38467 for (i = 0; i < 2; i++)
38469 HOST_WIDE_INT sub_value = value_hi_lo[i];
38471 if (sub_value & sign_bit)
38472 sub_value |= upper_32bits;
38474 op2_hi_lo[i] = GEN_INT (sub_value);
38476 /* If this is an AND instruction, check to see if we need to load
38477 the value in a register. */
38478 if (code == AND && sub_value != -1 && sub_value != 0
38479 && !and_operand (op2_hi_lo[i], SImode))
38480 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
38485 for (i = 0; i < 2; i++)
38487 /* Split large IOR/XOR operations. */
38488 if ((code == IOR || code == XOR)
38489 && CONST_INT_P (op2_hi_lo[i])
38490 && !complement_final_p
38491 && !complement_op1_p
38492 && !complement_op2_p
38493 && !logical_const_operand (op2_hi_lo[i], SImode))
38495 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
38496 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
38497 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
38498 rtx tmp = gen_reg_rtx (SImode);
38500 /* Make sure the constant is sign extended. */
38501 if ((hi_16bits & sign_bit) != 0)
38502 hi_16bits |= upper_32bits;
38504 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
38505 code, SImode, false, false, false);
38507 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
38508 code, SImode, false, false, false);
38511 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
38512 code, SImode, complement_final_p,
38513 complement_op1_p, complement_op2_p);
38519 /* Split the insns that make up boolean operations operating on multiple GPR
38520 registers. The boolean MD patterns ensure that the inputs either are
38521 exactly the same as the output registers, or there is no overlap.
38523 OPERANDS is an array containing the destination and two input operands.
38524 CODE is the base operation (AND, IOR, XOR, NOT).
38525 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38526 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38527 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38530 rs6000_split_logical (rtx operands[3],
38531 enum rtx_code code,
38532 bool complement_final_p,
38533 bool complement_op1_p,
38534 bool complement_op2_p)
38536 machine_mode mode = GET_MODE (operands[0]);
38537 machine_mode sub_mode;
38539 int sub_size, regno0, regno1, nregs, i;
38541 /* If this is DImode, use the specialized version that can run before
38542 register allocation. */
38543 if (mode == DImode && !TARGET_POWERPC64)
38545 rs6000_split_logical_di (operands, code, complement_final_p,
38546 complement_op1_p, complement_op2_p);
38552 op2 = (code == NOT) ? NULL_RTX : operands[2];
38553 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
38554 sub_size = GET_MODE_SIZE (sub_mode);
38555 regno0 = REGNO (op0);
38556 regno1 = REGNO (op1);
38558 gcc_assert (reload_completed);
38559 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38560 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38562 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
38563 gcc_assert (nregs > 1);
38565 if (op2 && REG_P (op2))
38566 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
38568 for (i = 0; i < nregs; i++)
38570 int offset = i * sub_size;
38571 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
38572 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
38573 rtx sub_op2 = ((code == NOT)
38575 : simplify_subreg (sub_mode, op2, mode, offset));
38577 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
38578 complement_final_p, complement_op1_p,
38586 /* Return true if the peephole2 can combine a load involving a combination of
38587 an addis instruction and a load with an offset that can be fused together on
38591 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
38592 rtx addis_value, /* addis value. */
38593 rtx target, /* target register that is loaded. */
38594 rtx mem) /* bottom part of the memory addr. */
38599 /* Validate arguments. */
38600 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38603 if (!base_reg_operand (target, GET_MODE (target)))
38606 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38609 /* Allow sign/zero extension. */
38610 if (GET_CODE (mem) == ZERO_EXTEND
38611 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
38612 mem = XEXP (mem, 0);
38617 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
38620 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38621 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
38624 /* Validate that the register used to load the high value is either the
38625 register being loaded, or we can safely replace its use.
38627 This function is only called from the peephole2 pass and we assume that
38628 there are 2 instructions in the peephole (addis and load), so we want to
38629 check if the target register was not used in the memory address and the
38630 register to hold the addis result is dead after the peephole. */
38631 if (REGNO (addis_reg) != REGNO (target))
38633 if (reg_mentioned_p (target, mem))
38636 if (!peep2_reg_dead_p (2, addis_reg))
38639 /* If the target register being loaded is the stack pointer, we must
38640 avoid loading any other value into it, even temporarily. */
38641 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
38645 base_reg = XEXP (addr, 0);
38646 return REGNO (addis_reg) == REGNO (base_reg);
38649 /* During the peephole2 pass, adjust and expand the insns for a load fusion
38650 sequence. We adjust the addis register to use the target register. If the
38651 load sign extends, we adjust the code to do the zero extending load, and an
38652 explicit sign extension later since the fusion only covers zero extending
38656 operands[0] register set with addis (to be replaced with target)
38657 operands[1] value set via addis
38658 operands[2] target register being loaded
38659 operands[3] D-form memory reference using operands[0]. */
38662 expand_fusion_gpr_load (rtx *operands)
38664 rtx addis_value = operands[1];
38665 rtx target = operands[2];
38666 rtx orig_mem = operands[3];
38667 rtx new_addr, new_mem, orig_addr, offset;
38668 enum rtx_code plus_or_lo_sum;
38669 machine_mode target_mode = GET_MODE (target);
38670 machine_mode extend_mode = target_mode;
38671 machine_mode ptr_mode = Pmode;
38672 enum rtx_code extend = UNKNOWN;
38674 if (GET_CODE (orig_mem) == ZERO_EXTEND
38675 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
38677 extend = GET_CODE (orig_mem);
38678 orig_mem = XEXP (orig_mem, 0);
38679 target_mode = GET_MODE (orig_mem);
38682 gcc_assert (MEM_P (orig_mem));
38684 orig_addr = XEXP (orig_mem, 0);
38685 plus_or_lo_sum = GET_CODE (orig_addr);
38686 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38688 offset = XEXP (orig_addr, 1);
38689 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38690 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38692 if (extend != UNKNOWN)
38693 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
38695 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
38696 UNSPEC_FUSION_GPR);
38697 emit_insn (gen_rtx_SET (target, new_mem));
38699 if (extend == SIGN_EXTEND)
38701 int sub_off = ((BYTES_BIG_ENDIAN)
38702 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
38705 = simplify_subreg (target_mode, target, extend_mode, sub_off);
38707 emit_insn (gen_rtx_SET (target,
38708 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
38714 /* Emit the addis instruction that will be part of a fused instruction
38718 emit_fusion_addis (rtx target, rtx addis_value)
38721 const char *addis_str = NULL;
38723 /* Emit the addis instruction. */
38724 fuse_ops[0] = target;
38725 if (satisfies_constraint_L (addis_value))
38727 fuse_ops[1] = addis_value;
38728 addis_str = "lis %0,%v1";
38731 else if (GET_CODE (addis_value) == PLUS)
38733 rtx op0 = XEXP (addis_value, 0);
38734 rtx op1 = XEXP (addis_value, 1);
38736 if (REG_P (op0) && CONST_INT_P (op1)
38737 && satisfies_constraint_L (op1))
38741 addis_str = "addis %0,%1,%v2";
38745 else if (GET_CODE (addis_value) == HIGH)
38747 rtx value = XEXP (addis_value, 0);
38748 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
38750 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
38751 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
38753 addis_str = "addis %0,%2,%1@toc@ha";
38755 else if (TARGET_XCOFF)
38756 addis_str = "addis %0,%1@u(%2)";
38759 gcc_unreachable ();
38762 else if (GET_CODE (value) == PLUS)
38764 rtx op0 = XEXP (value, 0);
38765 rtx op1 = XEXP (value, 1);
38767 if (GET_CODE (op0) == UNSPEC
38768 && XINT (op0, 1) == UNSPEC_TOCREL
38769 && CONST_INT_P (op1))
38771 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
38772 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
38775 addis_str = "addis %0,%2,%1+%3@toc@ha";
38777 else if (TARGET_XCOFF)
38778 addis_str = "addis %0,%1+%3@u(%2)";
38781 gcc_unreachable ();
38785 else if (satisfies_constraint_L (value))
38787 fuse_ops[1] = value;
38788 addis_str = "lis %0,%v1";
38791 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
38793 fuse_ops[1] = value;
38794 addis_str = "lis %0,%1@ha";
38799 fatal_insn ("Could not generate addis value for fusion", addis_value);
38801 output_asm_insn (addis_str, fuse_ops);
38804 /* Emit a D-form load or store instruction that is the second instruction
38805 of a fusion sequence. */
38808 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
38811 char insn_template[80];
38813 fuse_ops[0] = load_reg;
38814 fuse_ops[1] = addis_reg;
38816 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
38818 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
38819 fuse_ops[2] = offset;
38820 output_asm_insn (insn_template, fuse_ops);
38823 else if (GET_CODE (offset) == UNSPEC
38824 && XINT (offset, 1) == UNSPEC_TOCREL)
38827 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
38829 else if (TARGET_XCOFF)
38830 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
38833 gcc_unreachable ();
38835 fuse_ops[2] = XVECEXP (offset, 0, 0);
38836 output_asm_insn (insn_template, fuse_ops);
38839 else if (GET_CODE (offset) == PLUS
38840 && GET_CODE (XEXP (offset, 0)) == UNSPEC
38841 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
38842 && CONST_INT_P (XEXP (offset, 1)))
38844 rtx tocrel_unspec = XEXP (offset, 0);
38846 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
38848 else if (TARGET_XCOFF)
38849 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
38852 gcc_unreachable ();
38854 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
38855 fuse_ops[3] = XEXP (offset, 1);
38856 output_asm_insn (insn_template, fuse_ops);
38859 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
38861 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
38863 fuse_ops[2] = offset;
38864 output_asm_insn (insn_template, fuse_ops);
38868 fatal_insn ("Unable to generate load/store offset for fusion", offset);
38873 /* Given an address, convert it into the addis and load offset parts. Addresses
38874 created during the peephole2 process look like:
38875 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
38876 (unspec [(...)] UNSPEC_TOCREL)) */
38879 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
38883 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
38885 hi = XEXP (addr, 0);
38886 lo = XEXP (addr, 1);
38889 gcc_unreachable ();
38895 /* Return a string to fuse an addis instruction with a gpr load to the same
38896 register that we loaded up the addis instruction. The address that is used
38897 is the logical address that was formed during peephole2:
38898 (lo_sum (high) (low-part))
38900 The code is complicated, so we call output_asm_insn directly, and just
38904 emit_fusion_gpr_load (rtx target, rtx mem)
38909 const char *load_str = NULL;
38912 if (GET_CODE (mem) == ZERO_EXTEND)
38913 mem = XEXP (mem, 0);
38915 gcc_assert (REG_P (target) && MEM_P (mem));
38917 addr = XEXP (mem, 0);
38918 fusion_split_address (addr, &addis_value, &load_offset);
38920 /* Now emit the load instruction to the same register. */
38921 mode = GET_MODE (mem);
38939 gcc_assert (TARGET_POWERPC64);
38944 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
38947 /* Emit the addis instruction. */
38948 emit_fusion_addis (target, addis_value);
38950 /* Emit the D-form load instruction. */
38951 emit_fusion_load (target, target, load_offset, load_str);
38957 #ifdef RS6000_GLIBC_ATOMIC_FENV
38958 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
38959 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
38962 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
38965 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
38967 if (!TARGET_HARD_FLOAT)
38969 #ifdef RS6000_GLIBC_ATOMIC_FENV
38970 if (atomic_hold_decl == NULL_TREE)
38973 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38974 get_identifier ("__atomic_feholdexcept"),
38975 build_function_type_list (void_type_node,
38976 double_ptr_type_node,
38978 TREE_PUBLIC (atomic_hold_decl) = 1;
38979 DECL_EXTERNAL (atomic_hold_decl) = 1;
38982 if (atomic_clear_decl == NULL_TREE)
38985 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38986 get_identifier ("__atomic_feclearexcept"),
38987 build_function_type_list (void_type_node,
38989 TREE_PUBLIC (atomic_clear_decl) = 1;
38990 DECL_EXTERNAL (atomic_clear_decl) = 1;
38993 tree const_double = build_qualified_type (double_type_node,
38995 tree const_double_ptr = build_pointer_type (const_double);
38996 if (atomic_update_decl == NULL_TREE)
38999 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
39000 get_identifier ("__atomic_feupdateenv"),
39001 build_function_type_list (void_type_node,
39004 TREE_PUBLIC (atomic_update_decl) = 1;
39005 DECL_EXTERNAL (atomic_update_decl) = 1;
39008 tree fenv_var = create_tmp_var_raw (double_type_node);
39009 TREE_ADDRESSABLE (fenv_var) = 1;
39010 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
39012 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
39013 *clear = build_call_expr (atomic_clear_decl, 0);
39014 *update = build_call_expr (atomic_update_decl, 1,
39015 fold_convert (const_double_ptr, fenv_addr));
39020 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
39021 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
39022 tree call_mffs = build_call_expr (mffs, 0);
39024 /* Generates the equivalent of feholdexcept (&fenv_var)
39026 *fenv_var = __builtin_mffs ();
39028 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
39029 __builtin_mtfsf (0xff, fenv_hold); */
39031 /* Mask to clear everything except for the rounding modes and non-IEEE
39032 arithmetic flag. */
39033 const unsigned HOST_WIDE_INT hold_exception_mask =
39034 HOST_WIDE_INT_C (0xffffffff00000007);
39036 tree fenv_var = create_tmp_var_raw (double_type_node);
39038 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
39040 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
39041 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
39042 build_int_cst (uint64_type_node,
39043 hold_exception_mask));
39045 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
39048 tree hold_mtfsf = build_call_expr (mtfsf, 2,
39049 build_int_cst (unsigned_type_node, 0xff),
39052 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
39054 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
39056 double fenv_clear = __builtin_mffs ();
39057 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
39058 __builtin_mtfsf (0xff, fenv_clear); */
39060 /* Mask to clear everything except for the rounding modes and non-IEEE
39061 arithmetic flag. */
39062 const unsigned HOST_WIDE_INT clear_exception_mask =
39063 HOST_WIDE_INT_C (0xffffffff00000000);
39065 tree fenv_clear = create_tmp_var_raw (double_type_node);
39067 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
39069 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
39070 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
39072 build_int_cst (uint64_type_node,
39073 clear_exception_mask));
39075 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
39076 fenv_clear_llu_and);
39078 tree clear_mtfsf = build_call_expr (mtfsf, 2,
39079 build_int_cst (unsigned_type_node, 0xff),
39082 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
39084 /* Generates the equivalent of feupdateenv (&fenv_var)
39086 double old_fenv = __builtin_mffs ();
39087 double fenv_update;
39088 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
39089 (*(uint64_t*)fenv_var 0x1ff80fff);
39090 __builtin_mtfsf (0xff, fenv_update); */
39092 const unsigned HOST_WIDE_INT update_exception_mask =
39093 HOST_WIDE_INT_C (0xffffffff1fffff00);
39094 const unsigned HOST_WIDE_INT new_exception_mask =
39095 HOST_WIDE_INT_C (0x1ff80fff);
39097 tree old_fenv = create_tmp_var_raw (double_type_node);
39098 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
39100 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
39101 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
39102 build_int_cst (uint64_type_node,
39103 update_exception_mask));
39105 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
39106 build_int_cst (uint64_type_node,
39107 new_exception_mask));
39109 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
39110 old_llu_and, new_llu_and);
39112 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
39115 tree update_mtfsf = build_call_expr (mtfsf, 2,
39116 build_int_cst (unsigned_type_node, 0xff),
39117 fenv_update_mtfsf);
39119 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
39123 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
39125 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
39127 rtx_tmp0 = gen_reg_rtx (V2DFmode);
39128 rtx_tmp1 = gen_reg_rtx (V2DFmode);
39130 /* The destination of the vmrgew instruction layout is:
39131 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
39132 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
39133 vmrgew instruction will be correct. */
39134 if (BYTES_BIG_ENDIAN)
39136 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
39138 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
39143 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
39144 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
39147 rtx_tmp2 = gen_reg_rtx (V4SFmode);
39148 rtx_tmp3 = gen_reg_rtx (V4SFmode);
39150 emit_insn (gen_vsx_xvcdpsp (rtx_tmp2, rtx_tmp0));
39151 emit_insn (gen_vsx_xvcdpsp (rtx_tmp3, rtx_tmp1));
39153 if (BYTES_BIG_ENDIAN)
39154 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
39156 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
39160 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
39162 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
39164 rtx_tmp0 = gen_reg_rtx (V2DImode);
39165 rtx_tmp1 = gen_reg_rtx (V2DImode);
39167 /* The destination of the vmrgew instruction layout is:
39168 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
39169 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
39170 vmrgew instruction will be correct. */
39171 if (BYTES_BIG_ENDIAN)
39173 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
39174 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
39178 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
39179 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
39182 rtx_tmp2 = gen_reg_rtx (V4SFmode);
39183 rtx_tmp3 = gen_reg_rtx (V4SFmode);
39185 if (signed_convert)
39187 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
39188 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
39192 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
39193 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
39196 if (BYTES_BIG_ENDIAN)
39197 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
39199 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
39203 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
39206 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
39208 rtx_tmp0 = gen_reg_rtx (V2DFmode);
39209 rtx_tmp1 = gen_reg_rtx (V2DFmode);
39211 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
39212 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
39214 rtx_tmp2 = gen_reg_rtx (V4SImode);
39215 rtx_tmp3 = gen_reg_rtx (V4SImode);
39217 if (signed_convert)
39219 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
39220 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
39224 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
39225 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
39228 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
39231 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
39234 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
39235 optimization_type opt_type)
39240 return (opt_type == OPTIMIZE_FOR_SPEED
39241 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
39248 /* Implement TARGET_CONSTANT_ALIGNMENT. */
39250 static HOST_WIDE_INT
39251 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
39253 if (TREE_CODE (exp) == STRING_CST
39254 && (STRICT_ALIGNMENT || !optimize_size))
39255 return MAX (align, BITS_PER_WORD);
39259 /* Implement TARGET_STARTING_FRAME_OFFSET. */
39261 static HOST_WIDE_INT
39262 rs6000_starting_frame_offset (void)
39264 if (FRAME_GROWS_DOWNWARD)
39266 return RS6000_STARTING_FRAME_OFFSET;
39270 /* Create an alias for a mangled name where we have changed the mangling (in
39271 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
39272 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
39274 #if TARGET_ELF && RS6000_WEAK
39276 rs6000_globalize_decl_name (FILE * stream, tree decl)
39278 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
39280 targetm.asm_out.globalize_label (stream, name);
39282 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
39284 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
39285 const char *old_name;
39287 ieee128_mangling_gcc_8_1 = true;
39288 lang_hooks.set_decl_assembler_name (decl);
39289 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
39290 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
39291 ieee128_mangling_gcc_8_1 = false;
39293 if (strcmp (name, old_name) != 0)
39295 fprintf (stream, "\t.weak %s\n", old_name);
39296 fprintf (stream, "\t.set %s,%s\n", old_name, name);
39303 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
39304 function names from <foo>l to <foo>f128 if the default long double type is
39305 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
39306 include file switches the names on systems that support long double as IEEE
39307 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
39308 In the future, glibc will export names like __ieee128_sinf128 and we can
39309 switch to using those instead of using sinf128, which pollutes the user's
39312 This will switch the names for Fortran math functions as well (which doesn't
39313 use math.h). However, Fortran needs other changes to the compiler and
39314 library before you can switch the real*16 type at compile time.
39316 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
39317 only do this if the default is that long double is IBM extended double, and
39318 the user asked for IEEE 128-bit. */
39321 rs6000_mangle_decl_assembler_name (tree decl, tree id)
39323 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
39324 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
39326 size_t len = IDENTIFIER_LENGTH (id);
39327 const char *name = IDENTIFIER_POINTER (id);
39329 if (name[len - 1] == 'l')
39331 bool uses_ieee128_p = false;
39332 tree type = TREE_TYPE (decl);
39333 machine_mode ret_mode = TYPE_MODE (type);
39335 /* See if the function returns a IEEE 128-bit floating point type or
39337 if (ret_mode == TFmode || ret_mode == TCmode)
39338 uses_ieee128_p = true;
39341 function_args_iterator args_iter;
39344 /* See if the function passes a IEEE 128-bit floating point type
39345 or complex type. */
39346 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
39348 machine_mode arg_mode = TYPE_MODE (arg);
39349 if (arg_mode == TFmode || arg_mode == TCmode)
39351 uses_ieee128_p = true;
39357 /* If we passed or returned an IEEE 128-bit floating point type,
39358 change the name. */
39359 if (uses_ieee128_p)
39361 char *name2 = (char *) alloca (len + 4);
39362 memcpy (name2, name, len - 1);
39363 strcpy (name2 + len - 1, "f128");
39364 id = get_identifier (name2);
39373 struct gcc_target targetm = TARGET_INITIALIZER;
39375 #include "gt-rs6000.h"