1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
25 #include "coretypes.h"
35 #include "stringpool.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
46 #include "fold-const.h"
48 #include "stor-layout.h"
50 #include "print-tree.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
58 #include "sched-int.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-ssa.h"
63 #include "gimple-walk.h"
66 #include "tm-constrs.h"
67 #include "tree-vectorizer.h"
68 #include "target-globals.h"
70 #include "tree-vector-builder.h"
72 #include "tree-pass.h"
75 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
77 #include "case-cfn-macros.h"
79 #include "tree-ssa-propagate.h"
81 #include "tree-ssanames.h"
82 #include "rs6000-internal.h"
84 /* This file should be included last. */
85 #include "target-def.h"
87 #ifndef TARGET_NO_PROTOTYPE
88 #define TARGET_NO_PROTOTYPE 0
91 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
92 systems will also set long double to be IEEE 128-bit. AIX and Darwin
93 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
94 those systems will not pick up this default. This needs to be after all
95 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
97 #ifndef TARGET_IEEEQUAD_DEFAULT
98 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
99 #define TARGET_IEEEQUAD_DEFAULT 1
101 #define TARGET_IEEEQUAD_DEFAULT 0
105 static pad_direction rs6000_function_arg_padding (machine_mode, const_tree);
107 /* Support targetm.vectorize.builtin_mask_for_load. */
108 static GTY(()) tree altivec_builtin_mask_for_load;
110 /* Set to nonzero once AIX common-mode calls have been defined. */
111 static GTY(()) int common_mode_defined;
114 /* Counter for labels which are to be placed in .fixup. */
115 int fixuplabelno = 0;
118 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
121 /* Specify the machine mode that pointers have. After generation of rtl, the
122 compiler makes no further distinction between pointers and any other objects
123 of this machine mode. */
124 scalar_int_mode rs6000_pmode;
127 /* Note whether IEEE 128-bit floating point was passed or returned, either as
128 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
129 floating point. We changed the default C++ mangling for these types and we
130 may want to generate a weak alias of the old mangling (U10__float128) to the
131 new mangling (u9__ieee128). */
132 static bool rs6000_passes_ieee128;
135 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
136 name used in current releases (i.e. u9__ieee128). */
137 static bool ieee128_mangling_gcc_8_1;
139 /* Width in bits of a pointer. */
140 unsigned rs6000_pointer_size;
142 #ifdef HAVE_AS_GNU_ATTRIBUTE
143 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
144 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
146 /* Flag whether floating point values have been passed/returned.
147 Note that this doesn't say whether fprs are used, since the
148 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
149 should be set for soft-float values passed in gprs and ieee128
150 values passed in vsx registers. */
151 static bool rs6000_passes_float;
152 static bool rs6000_passes_long_double;
153 /* Flag whether vector values have been passed/returned. */
154 static bool rs6000_passes_vector;
155 /* Flag whether small (<= 8 byte) structures have been returned. */
156 static bool rs6000_returns_struct;
159 /* Value is TRUE if register/mode pair is acceptable. */
160 static bool rs6000_hard_regno_mode_ok_p
161 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
163 /* Maximum number of registers needed for a given register class and mode. */
164 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
166 /* How many registers are needed for a given register and mode. */
167 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
169 /* Map register number to register class. */
170 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
172 static int dbg_cost_ctrl;
174 /* Built in types. */
175 tree rs6000_builtin_types[RS6000_BTI_MAX];
176 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
178 /* Flag to say the TOC is initialized */
179 int toc_initialized, need_toc_init;
180 char toc_label_name[10];
182 /* Cached value of rs6000_variable_issue. This is cached in
183 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
184 static short cached_can_issue_more;
186 static GTY(()) section *read_only_data_section;
187 static GTY(()) section *private_data_section;
188 static GTY(()) section *tls_data_section;
189 static GTY(()) section *tls_private_data_section;
190 static GTY(()) section *read_only_private_data_section;
191 static GTY(()) section *sdata2_section;
193 extern GTY(()) section *toc_section;
194 section *toc_section = 0;
196 struct builtin_description
198 const HOST_WIDE_INT mask;
199 const enum insn_code icode;
200 const char *const name;
201 const enum rs6000_builtins code;
204 /* Describe the vector unit used for modes. */
205 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
206 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
208 /* Register classes for various constraints that are based on the target
210 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
212 /* Describe the alignment of a vector. */
213 int rs6000_vector_align[NUM_MACHINE_MODES];
215 /* Map selected modes to types for builtins. */
216 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
218 /* What modes to automatically generate reciprocal divide estimate (fre) and
219 reciprocal sqrt (frsqrte) for. */
220 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
222 /* Masks to determine which reciprocal esitmate instructions to generate
224 enum rs6000_recip_mask {
225 RECIP_SF_DIV = 0x001, /* Use divide estimate */
226 RECIP_DF_DIV = 0x002,
227 RECIP_V4SF_DIV = 0x004,
228 RECIP_V2DF_DIV = 0x008,
230 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
231 RECIP_DF_RSQRT = 0x020,
232 RECIP_V4SF_RSQRT = 0x040,
233 RECIP_V2DF_RSQRT = 0x080,
235 /* Various combination of flags for -mrecip=xxx. */
237 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
238 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
239 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
241 RECIP_HIGH_PRECISION = RECIP_ALL,
243 /* On low precision machines like the power5, don't enable double precision
244 reciprocal square root estimate, since it isn't accurate enough. */
245 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
248 /* -mrecip options. */
251 const char *string; /* option name */
252 unsigned int mask; /* mask bits to set */
253 } recip_options[] = {
254 { "all", RECIP_ALL },
255 { "none", RECIP_NONE },
256 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
258 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
259 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
260 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
261 | RECIP_V2DF_RSQRT) },
262 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
263 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
266 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
272 { "power9", PPC_PLATFORM_POWER9 },
273 { "power8", PPC_PLATFORM_POWER8 },
274 { "power7", PPC_PLATFORM_POWER7 },
275 { "power6x", PPC_PLATFORM_POWER6X },
276 { "power6", PPC_PLATFORM_POWER6 },
277 { "power5+", PPC_PLATFORM_POWER5_PLUS },
278 { "power5", PPC_PLATFORM_POWER5 },
279 { "ppc970", PPC_PLATFORM_PPC970 },
280 { "power4", PPC_PLATFORM_POWER4 },
281 { "ppca2", PPC_PLATFORM_PPCA2 },
282 { "ppc476", PPC_PLATFORM_PPC476 },
283 { "ppc464", PPC_PLATFORM_PPC464 },
284 { "ppc440", PPC_PLATFORM_PPC440 },
285 { "ppc405", PPC_PLATFORM_PPC405 },
286 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
289 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
295 } cpu_supports_info[] = {
296 /* AT_HWCAP masks. */
297 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
298 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
299 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
300 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
301 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
302 { "booke", PPC_FEATURE_BOOKE, 0 },
303 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
304 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
305 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
306 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
307 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
308 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
309 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
310 { "notb", PPC_FEATURE_NO_TB, 0 },
311 { "pa6t", PPC_FEATURE_PA6T, 0 },
312 { "power4", PPC_FEATURE_POWER4, 0 },
313 { "power5", PPC_FEATURE_POWER5, 0 },
314 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
315 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
316 { "ppc32", PPC_FEATURE_32, 0 },
317 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
318 { "ppc64", PPC_FEATURE_64, 0 },
319 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
320 { "smt", PPC_FEATURE_SMT, 0 },
321 { "spe", PPC_FEATURE_HAS_SPE, 0 },
322 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
323 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
324 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
326 /* AT_HWCAP2 masks. */
327 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
328 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
329 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
330 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
331 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
332 { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 },
333 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
334 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
335 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
336 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
337 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 },
338 { "darn", PPC_FEATURE2_DARN, 1 },
339 { "scv", PPC_FEATURE2_SCV, 1 }
342 /* On PowerPC, we have a limited number of target clones that we care about
343 which means we can use an array to hold the options, rather than having more
344 elaborate data structures to identify each possible variation. Order the
345 clones from the default to the highest ISA. */
347 CLONE_DEFAULT = 0, /* default clone. */
348 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
349 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
350 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
351 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
355 /* Map compiler ISA bits into HWCAP names. */
357 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
358 const char *name; /* name to use in __builtin_cpu_supports. */
361 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
362 { 0, "" }, /* Default options. */
363 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
364 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
365 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
366 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
370 /* Newer LIBCs explicitly export this symbol to declare that they provide
371 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
372 reference to this symbol whenever we expand a CPU builtin, so that
373 we never link against an old LIBC. */
374 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
376 /* True if we have expanded a CPU builtin. */
379 /* Pointer to function (in rs6000-c.c) that can define or undefine target
380 macros that have changed. Languages that don't support the preprocessor
381 don't link in rs6000-c.c, so we can't call it directly. */
382 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
384 /* Simplfy register classes into simpler classifications. We assume
385 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
386 check for standard register classes (gpr/floating/altivec/vsx) and
387 floating/vector classes (float/altivec/vsx). */
389 enum rs6000_reg_type {
400 /* Map register class to register type. */
401 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
403 /* First/last register type for the 'normal' register types (i.e. general
404 purpose, floating point, altivec, and VSX registers). */
405 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
407 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
410 /* Register classes we care about in secondary reload or go if legitimate
411 address. We only need to worry about GPR, FPR, and Altivec registers here,
412 along an ANY field that is the OR of the 3 register classes. */
414 enum rs6000_reload_reg_type {
415 RELOAD_REG_GPR, /* General purpose registers. */
416 RELOAD_REG_FPR, /* Traditional floating point regs. */
417 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
418 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
422 /* For setting up register classes, loop through the 3 register classes mapping
423 into real registers, and skip the ANY class, which is just an OR of the
425 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
426 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
428 /* Map reload register type to a register in the register class. */
429 struct reload_reg_map_type {
430 const char *name; /* Register class name. */
431 int reg; /* Register in the register class. */
434 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
435 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
436 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
437 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
438 { "Any", -1 }, /* RELOAD_REG_ANY. */
441 /* Mask bits for each register class, indexed per mode. Historically the
442 compiler has been more restrictive which types can do PRE_MODIFY instead of
443 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
444 typedef unsigned char addr_mask_type;
446 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
447 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
448 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
449 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
450 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
451 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
452 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
453 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
455 /* Register type masks based on the type, of valid addressing modes. */
456 struct rs6000_reg_addr {
457 enum insn_code reload_load; /* INSN to reload for loading. */
458 enum insn_code reload_store; /* INSN to reload for storing. */
459 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
460 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
461 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
462 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
463 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
466 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
468 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
470 mode_supports_pre_incdec_p (machine_mode mode)
472 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
476 /* Helper function to say whether a mode supports PRE_MODIFY. */
478 mode_supports_pre_modify_p (machine_mode mode)
480 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
484 /* Return true if we have D-form addressing in altivec registers. */
486 mode_supports_vmx_dform (machine_mode mode)
488 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
491 /* Return true if we have D-form addressing in VSX registers. This addressing
492 is more limited than normal d-form addressing in that the offset must be
493 aligned on a 16-byte boundary. */
495 mode_supports_dq_form (machine_mode mode)
497 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
501 /* Given that there exists at least one variable that is set (produced)
502 by OUT_INSN and read (consumed) by IN_INSN, return true iff
503 IN_INSN represents one or more memory store operations and none of
504 the variables set by OUT_INSN is used by IN_INSN as the address of a
505 store operation. If either IN_INSN or OUT_INSN does not represent
506 a "single" RTL SET expression (as loosely defined by the
507 implementation of the single_set function) or a PARALLEL with only
508 SETs, CLOBBERs, and USEs inside, this function returns false.
510 This rs6000-specific version of store_data_bypass_p checks for
511 certain conditions that result in assertion failures (and internal
512 compiler errors) in the generic store_data_bypass_p function and
513 returns false rather than calling store_data_bypass_p if one of the
514 problematic conditions is detected. */
517 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
524 in_set = single_set (in_insn);
527 if (MEM_P (SET_DEST (in_set)))
529 out_set = single_set (out_insn);
532 out_pat = PATTERN (out_insn);
533 if (GET_CODE (out_pat) == PARALLEL)
535 for (i = 0; i < XVECLEN (out_pat, 0); i++)
537 out_exp = XVECEXP (out_pat, 0, i);
538 if ((GET_CODE (out_exp) == CLOBBER)
539 || (GET_CODE (out_exp) == USE))
541 else if (GET_CODE (out_exp) != SET)
550 in_pat = PATTERN (in_insn);
551 if (GET_CODE (in_pat) != PARALLEL)
554 for (i = 0; i < XVECLEN (in_pat, 0); i++)
556 in_exp = XVECEXP (in_pat, 0, i);
557 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
559 else if (GET_CODE (in_exp) != SET)
562 if (MEM_P (SET_DEST (in_exp)))
564 out_set = single_set (out_insn);
567 out_pat = PATTERN (out_insn);
568 if (GET_CODE (out_pat) != PARALLEL)
570 for (j = 0; j < XVECLEN (out_pat, 0); j++)
572 out_exp = XVECEXP (out_pat, 0, j);
573 if ((GET_CODE (out_exp) == CLOBBER)
574 || (GET_CODE (out_exp) == USE))
576 else if (GET_CODE (out_exp) != SET)
583 return store_data_bypass_p (out_insn, in_insn);
587 /* Processor costs (relative to an add) */
589 const struct processor_costs *rs6000_cost;
591 /* Instruction size costs on 32bit processors. */
593 struct processor_costs size32_cost = {
594 COSTS_N_INSNS (1), /* mulsi */
595 COSTS_N_INSNS (1), /* mulsi_const */
596 COSTS_N_INSNS (1), /* mulsi_const9 */
597 COSTS_N_INSNS (1), /* muldi */
598 COSTS_N_INSNS (1), /* divsi */
599 COSTS_N_INSNS (1), /* divdi */
600 COSTS_N_INSNS (1), /* fp */
601 COSTS_N_INSNS (1), /* dmul */
602 COSTS_N_INSNS (1), /* sdiv */
603 COSTS_N_INSNS (1), /* ddiv */
604 32, /* cache line size */
608 0, /* SF->DF convert */
611 /* Instruction size costs on 64bit processors. */
613 struct processor_costs size64_cost = {
614 COSTS_N_INSNS (1), /* mulsi */
615 COSTS_N_INSNS (1), /* mulsi_const */
616 COSTS_N_INSNS (1), /* mulsi_const9 */
617 COSTS_N_INSNS (1), /* muldi */
618 COSTS_N_INSNS (1), /* divsi */
619 COSTS_N_INSNS (1), /* divdi */
620 COSTS_N_INSNS (1), /* fp */
621 COSTS_N_INSNS (1), /* dmul */
622 COSTS_N_INSNS (1), /* sdiv */
623 COSTS_N_INSNS (1), /* ddiv */
624 128, /* cache line size */
628 0, /* SF->DF convert */
631 /* Instruction costs on RS64A processors. */
633 struct processor_costs rs64a_cost = {
634 COSTS_N_INSNS (20), /* mulsi */
635 COSTS_N_INSNS (12), /* mulsi_const */
636 COSTS_N_INSNS (8), /* mulsi_const9 */
637 COSTS_N_INSNS (34), /* muldi */
638 COSTS_N_INSNS (65), /* divsi */
639 COSTS_N_INSNS (67), /* divdi */
640 COSTS_N_INSNS (4), /* fp */
641 COSTS_N_INSNS (4), /* dmul */
642 COSTS_N_INSNS (31), /* sdiv */
643 COSTS_N_INSNS (31), /* ddiv */
644 128, /* cache line size */
648 0, /* SF->DF convert */
651 /* Instruction costs on MPCCORE processors. */
653 struct processor_costs mpccore_cost = {
654 COSTS_N_INSNS (2), /* mulsi */
655 COSTS_N_INSNS (2), /* mulsi_const */
656 COSTS_N_INSNS (2), /* mulsi_const9 */
657 COSTS_N_INSNS (2), /* muldi */
658 COSTS_N_INSNS (6), /* divsi */
659 COSTS_N_INSNS (6), /* divdi */
660 COSTS_N_INSNS (4), /* fp */
661 COSTS_N_INSNS (5), /* dmul */
662 COSTS_N_INSNS (10), /* sdiv */
663 COSTS_N_INSNS (17), /* ddiv */
664 32, /* cache line size */
668 0, /* SF->DF convert */
671 /* Instruction costs on PPC403 processors. */
673 struct processor_costs ppc403_cost = {
674 COSTS_N_INSNS (4), /* mulsi */
675 COSTS_N_INSNS (4), /* mulsi_const */
676 COSTS_N_INSNS (4), /* mulsi_const9 */
677 COSTS_N_INSNS (4), /* muldi */
678 COSTS_N_INSNS (33), /* divsi */
679 COSTS_N_INSNS (33), /* divdi */
680 COSTS_N_INSNS (11), /* fp */
681 COSTS_N_INSNS (11), /* dmul */
682 COSTS_N_INSNS (11), /* sdiv */
683 COSTS_N_INSNS (11), /* ddiv */
684 32, /* cache line size */
688 0, /* SF->DF convert */
691 /* Instruction costs on PPC405 processors. */
693 struct processor_costs ppc405_cost = {
694 COSTS_N_INSNS (5), /* mulsi */
695 COSTS_N_INSNS (4), /* mulsi_const */
696 COSTS_N_INSNS (3), /* mulsi_const9 */
697 COSTS_N_INSNS (5), /* muldi */
698 COSTS_N_INSNS (35), /* divsi */
699 COSTS_N_INSNS (35), /* divdi */
700 COSTS_N_INSNS (11), /* fp */
701 COSTS_N_INSNS (11), /* dmul */
702 COSTS_N_INSNS (11), /* sdiv */
703 COSTS_N_INSNS (11), /* ddiv */
704 32, /* cache line size */
708 0, /* SF->DF convert */
711 /* Instruction costs on PPC440 processors. */
713 struct processor_costs ppc440_cost = {
714 COSTS_N_INSNS (3), /* mulsi */
715 COSTS_N_INSNS (2), /* mulsi_const */
716 COSTS_N_INSNS (2), /* mulsi_const9 */
717 COSTS_N_INSNS (3), /* muldi */
718 COSTS_N_INSNS (34), /* divsi */
719 COSTS_N_INSNS (34), /* divdi */
720 COSTS_N_INSNS (5), /* fp */
721 COSTS_N_INSNS (5), /* dmul */
722 COSTS_N_INSNS (19), /* sdiv */
723 COSTS_N_INSNS (33), /* ddiv */
724 32, /* cache line size */
728 0, /* SF->DF convert */
731 /* Instruction costs on PPC476 processors. */
733 struct processor_costs ppc476_cost = {
734 COSTS_N_INSNS (4), /* mulsi */
735 COSTS_N_INSNS (4), /* mulsi_const */
736 COSTS_N_INSNS (4), /* mulsi_const9 */
737 COSTS_N_INSNS (4), /* muldi */
738 COSTS_N_INSNS (11), /* divsi */
739 COSTS_N_INSNS (11), /* divdi */
740 COSTS_N_INSNS (6), /* fp */
741 COSTS_N_INSNS (6), /* dmul */
742 COSTS_N_INSNS (19), /* sdiv */
743 COSTS_N_INSNS (33), /* ddiv */
744 32, /* l1 cache line size */
748 0, /* SF->DF convert */
751 /* Instruction costs on PPC601 processors. */
753 struct processor_costs ppc601_cost = {
754 COSTS_N_INSNS (5), /* mulsi */
755 COSTS_N_INSNS (5), /* mulsi_const */
756 COSTS_N_INSNS (5), /* mulsi_const9 */
757 COSTS_N_INSNS (5), /* muldi */
758 COSTS_N_INSNS (36), /* divsi */
759 COSTS_N_INSNS (36), /* divdi */
760 COSTS_N_INSNS (4), /* fp */
761 COSTS_N_INSNS (5), /* dmul */
762 COSTS_N_INSNS (17), /* sdiv */
763 COSTS_N_INSNS (31), /* ddiv */
764 32, /* cache line size */
768 0, /* SF->DF convert */
771 /* Instruction costs on PPC603 processors. */
773 struct processor_costs ppc603_cost = {
774 COSTS_N_INSNS (5), /* mulsi */
775 COSTS_N_INSNS (3), /* mulsi_const */
776 COSTS_N_INSNS (2), /* mulsi_const9 */
777 COSTS_N_INSNS (5), /* muldi */
778 COSTS_N_INSNS (37), /* divsi */
779 COSTS_N_INSNS (37), /* divdi */
780 COSTS_N_INSNS (3), /* fp */
781 COSTS_N_INSNS (4), /* dmul */
782 COSTS_N_INSNS (18), /* sdiv */
783 COSTS_N_INSNS (33), /* ddiv */
784 32, /* cache line size */
788 0, /* SF->DF convert */
791 /* Instruction costs on PPC604 processors. */
793 struct processor_costs ppc604_cost = {
794 COSTS_N_INSNS (4), /* mulsi */
795 COSTS_N_INSNS (4), /* mulsi_const */
796 COSTS_N_INSNS (4), /* mulsi_const9 */
797 COSTS_N_INSNS (4), /* muldi */
798 COSTS_N_INSNS (20), /* divsi */
799 COSTS_N_INSNS (20), /* divdi */
800 COSTS_N_INSNS (3), /* fp */
801 COSTS_N_INSNS (3), /* dmul */
802 COSTS_N_INSNS (18), /* sdiv */
803 COSTS_N_INSNS (32), /* ddiv */
804 32, /* cache line size */
808 0, /* SF->DF convert */
811 /* Instruction costs on PPC604e processors. */
813 struct processor_costs ppc604e_cost = {
814 COSTS_N_INSNS (2), /* mulsi */
815 COSTS_N_INSNS (2), /* mulsi_const */
816 COSTS_N_INSNS (2), /* mulsi_const9 */
817 COSTS_N_INSNS (2), /* muldi */
818 COSTS_N_INSNS (20), /* divsi */
819 COSTS_N_INSNS (20), /* divdi */
820 COSTS_N_INSNS (3), /* fp */
821 COSTS_N_INSNS (3), /* dmul */
822 COSTS_N_INSNS (18), /* sdiv */
823 COSTS_N_INSNS (32), /* ddiv */
824 32, /* cache line size */
828 0, /* SF->DF convert */
831 /* Instruction costs on PPC620 processors. */
833 struct processor_costs ppc620_cost = {
834 COSTS_N_INSNS (5), /* mulsi */
835 COSTS_N_INSNS (4), /* mulsi_const */
836 COSTS_N_INSNS (3), /* mulsi_const9 */
837 COSTS_N_INSNS (7), /* muldi */
838 COSTS_N_INSNS (21), /* divsi */
839 COSTS_N_INSNS (37), /* divdi */
840 COSTS_N_INSNS (3), /* fp */
841 COSTS_N_INSNS (3), /* dmul */
842 COSTS_N_INSNS (18), /* sdiv */
843 COSTS_N_INSNS (32), /* ddiv */
844 128, /* cache line size */
848 0, /* SF->DF convert */
851 /* Instruction costs on PPC630 processors. */
853 struct processor_costs ppc630_cost = {
854 COSTS_N_INSNS (5), /* mulsi */
855 COSTS_N_INSNS (4), /* mulsi_const */
856 COSTS_N_INSNS (3), /* mulsi_const9 */
857 COSTS_N_INSNS (7), /* muldi */
858 COSTS_N_INSNS (21), /* divsi */
859 COSTS_N_INSNS (37), /* divdi */
860 COSTS_N_INSNS (3), /* fp */
861 COSTS_N_INSNS (3), /* dmul */
862 COSTS_N_INSNS (17), /* sdiv */
863 COSTS_N_INSNS (21), /* ddiv */
864 128, /* cache line size */
868 0, /* SF->DF convert */
871 /* Instruction costs on Cell processor. */
872 /* COSTS_N_INSNS (1) ~ one add. */
874 struct processor_costs ppccell_cost = {
875 COSTS_N_INSNS (9/2)+2, /* mulsi */
876 COSTS_N_INSNS (6/2), /* mulsi_const */
877 COSTS_N_INSNS (6/2), /* mulsi_const9 */
878 COSTS_N_INSNS (15/2)+2, /* muldi */
879 COSTS_N_INSNS (38/2), /* divsi */
880 COSTS_N_INSNS (70/2), /* divdi */
881 COSTS_N_INSNS (10/2), /* fp */
882 COSTS_N_INSNS (10/2), /* dmul */
883 COSTS_N_INSNS (74/2), /* sdiv */
884 COSTS_N_INSNS (74/2), /* ddiv */
885 128, /* cache line size */
889 0, /* SF->DF convert */
892 /* Instruction costs on PPC750 and PPC7400 processors. */
894 struct processor_costs ppc750_cost = {
895 COSTS_N_INSNS (5), /* mulsi */
896 COSTS_N_INSNS (3), /* mulsi_const */
897 COSTS_N_INSNS (2), /* mulsi_const9 */
898 COSTS_N_INSNS (5), /* muldi */
899 COSTS_N_INSNS (17), /* divsi */
900 COSTS_N_INSNS (17), /* divdi */
901 COSTS_N_INSNS (3), /* fp */
902 COSTS_N_INSNS (3), /* dmul */
903 COSTS_N_INSNS (17), /* sdiv */
904 COSTS_N_INSNS (31), /* ddiv */
905 32, /* cache line size */
909 0, /* SF->DF convert */
912 /* Instruction costs on PPC7450 processors. */
914 struct processor_costs ppc7450_cost = {
915 COSTS_N_INSNS (4), /* mulsi */
916 COSTS_N_INSNS (3), /* mulsi_const */
917 COSTS_N_INSNS (3), /* mulsi_const9 */
918 COSTS_N_INSNS (4), /* muldi */
919 COSTS_N_INSNS (23), /* divsi */
920 COSTS_N_INSNS (23), /* divdi */
921 COSTS_N_INSNS (5), /* fp */
922 COSTS_N_INSNS (5), /* dmul */
923 COSTS_N_INSNS (21), /* sdiv */
924 COSTS_N_INSNS (35), /* ddiv */
925 32, /* cache line size */
929 0, /* SF->DF convert */
932 /* Instruction costs on PPC8540 processors. */
934 struct processor_costs ppc8540_cost = {
935 COSTS_N_INSNS (4), /* mulsi */
936 COSTS_N_INSNS (4), /* mulsi_const */
937 COSTS_N_INSNS (4), /* mulsi_const9 */
938 COSTS_N_INSNS (4), /* muldi */
939 COSTS_N_INSNS (19), /* divsi */
940 COSTS_N_INSNS (19), /* divdi */
941 COSTS_N_INSNS (4), /* fp */
942 COSTS_N_INSNS (4), /* dmul */
943 COSTS_N_INSNS (29), /* sdiv */
944 COSTS_N_INSNS (29), /* ddiv */
945 32, /* cache line size */
948 1, /* prefetch streams /*/
949 0, /* SF->DF convert */
952 /* Instruction costs on E300C2 and E300C3 cores. */
954 struct processor_costs ppce300c2c3_cost = {
955 COSTS_N_INSNS (4), /* mulsi */
956 COSTS_N_INSNS (4), /* mulsi_const */
957 COSTS_N_INSNS (4), /* mulsi_const9 */
958 COSTS_N_INSNS (4), /* muldi */
959 COSTS_N_INSNS (19), /* divsi */
960 COSTS_N_INSNS (19), /* divdi */
961 COSTS_N_INSNS (3), /* fp */
962 COSTS_N_INSNS (4), /* dmul */
963 COSTS_N_INSNS (18), /* sdiv */
964 COSTS_N_INSNS (33), /* ddiv */
968 1, /* prefetch streams /*/
969 0, /* SF->DF convert */
972 /* Instruction costs on PPCE500MC processors. */
974 struct processor_costs ppce500mc_cost = {
975 COSTS_N_INSNS (4), /* mulsi */
976 COSTS_N_INSNS (4), /* mulsi_const */
977 COSTS_N_INSNS (4), /* mulsi_const9 */
978 COSTS_N_INSNS (4), /* muldi */
979 COSTS_N_INSNS (14), /* divsi */
980 COSTS_N_INSNS (14), /* divdi */
981 COSTS_N_INSNS (8), /* fp */
982 COSTS_N_INSNS (10), /* dmul */
983 COSTS_N_INSNS (36), /* sdiv */
984 COSTS_N_INSNS (66), /* ddiv */
985 64, /* cache line size */
988 1, /* prefetch streams /*/
989 0, /* SF->DF convert */
992 /* Instruction costs on PPCE500MC64 processors. */
994 struct processor_costs ppce500mc64_cost = {
995 COSTS_N_INSNS (4), /* mulsi */
996 COSTS_N_INSNS (4), /* mulsi_const */
997 COSTS_N_INSNS (4), /* mulsi_const9 */
998 COSTS_N_INSNS (4), /* muldi */
999 COSTS_N_INSNS (14), /* divsi */
1000 COSTS_N_INSNS (14), /* divdi */
1001 COSTS_N_INSNS (4), /* fp */
1002 COSTS_N_INSNS (10), /* dmul */
1003 COSTS_N_INSNS (36), /* sdiv */
1004 COSTS_N_INSNS (66), /* ddiv */
1005 64, /* cache line size */
1008 1, /* prefetch streams /*/
1009 0, /* SF->DF convert */
1012 /* Instruction costs on PPCE5500 processors. */
1014 struct processor_costs ppce5500_cost = {
1015 COSTS_N_INSNS (5), /* mulsi */
1016 COSTS_N_INSNS (5), /* mulsi_const */
1017 COSTS_N_INSNS (4), /* mulsi_const9 */
1018 COSTS_N_INSNS (5), /* muldi */
1019 COSTS_N_INSNS (14), /* divsi */
1020 COSTS_N_INSNS (14), /* divdi */
1021 COSTS_N_INSNS (7), /* fp */
1022 COSTS_N_INSNS (10), /* dmul */
1023 COSTS_N_INSNS (36), /* sdiv */
1024 COSTS_N_INSNS (66), /* ddiv */
1025 64, /* cache line size */
1028 1, /* prefetch streams /*/
1029 0, /* SF->DF convert */
1032 /* Instruction costs on PPCE6500 processors. */
1034 struct processor_costs ppce6500_cost = {
1035 COSTS_N_INSNS (5), /* mulsi */
1036 COSTS_N_INSNS (5), /* mulsi_const */
1037 COSTS_N_INSNS (4), /* mulsi_const9 */
1038 COSTS_N_INSNS (5), /* muldi */
1039 COSTS_N_INSNS (14), /* divsi */
1040 COSTS_N_INSNS (14), /* divdi */
1041 COSTS_N_INSNS (7), /* fp */
1042 COSTS_N_INSNS (10), /* dmul */
1043 COSTS_N_INSNS (36), /* sdiv */
1044 COSTS_N_INSNS (66), /* ddiv */
1045 64, /* cache line size */
1048 1, /* prefetch streams /*/
1049 0, /* SF->DF convert */
1052 /* Instruction costs on AppliedMicro Titan processors. */
1054 struct processor_costs titan_cost = {
1055 COSTS_N_INSNS (5), /* mulsi */
1056 COSTS_N_INSNS (5), /* mulsi_const */
1057 COSTS_N_INSNS (5), /* mulsi_const9 */
1058 COSTS_N_INSNS (5), /* muldi */
1059 COSTS_N_INSNS (18), /* divsi */
1060 COSTS_N_INSNS (18), /* divdi */
1061 COSTS_N_INSNS (10), /* fp */
1062 COSTS_N_INSNS (10), /* dmul */
1063 COSTS_N_INSNS (46), /* sdiv */
1064 COSTS_N_INSNS (72), /* ddiv */
1065 32, /* cache line size */
1068 1, /* prefetch streams /*/
1069 0, /* SF->DF convert */
1072 /* Instruction costs on POWER4 and POWER5 processors. */
1074 struct processor_costs power4_cost = {
1075 COSTS_N_INSNS (3), /* mulsi */
1076 COSTS_N_INSNS (2), /* mulsi_const */
1077 COSTS_N_INSNS (2), /* mulsi_const9 */
1078 COSTS_N_INSNS (4), /* muldi */
1079 COSTS_N_INSNS (18), /* divsi */
1080 COSTS_N_INSNS (34), /* divdi */
1081 COSTS_N_INSNS (3), /* fp */
1082 COSTS_N_INSNS (3), /* dmul */
1083 COSTS_N_INSNS (17), /* sdiv */
1084 COSTS_N_INSNS (17), /* ddiv */
1085 128, /* cache line size */
1087 1024, /* l2 cache */
1088 8, /* prefetch streams /*/
1089 0, /* SF->DF convert */
1092 /* Instruction costs on POWER6 processors. */
1094 struct processor_costs power6_cost = {
1095 COSTS_N_INSNS (8), /* mulsi */
1096 COSTS_N_INSNS (8), /* mulsi_const */
1097 COSTS_N_INSNS (8), /* mulsi_const9 */
1098 COSTS_N_INSNS (8), /* muldi */
1099 COSTS_N_INSNS (22), /* divsi */
1100 COSTS_N_INSNS (28), /* divdi */
1101 COSTS_N_INSNS (3), /* fp */
1102 COSTS_N_INSNS (3), /* dmul */
1103 COSTS_N_INSNS (13), /* sdiv */
1104 COSTS_N_INSNS (16), /* ddiv */
1105 128, /* cache line size */
1107 2048, /* l2 cache */
1108 16, /* prefetch streams */
1109 0, /* SF->DF convert */
1112 /* Instruction costs on POWER7 processors. */
1114 struct processor_costs power7_cost = {
1115 COSTS_N_INSNS (2), /* mulsi */
1116 COSTS_N_INSNS (2), /* mulsi_const */
1117 COSTS_N_INSNS (2), /* mulsi_const9 */
1118 COSTS_N_INSNS (2), /* muldi */
1119 COSTS_N_INSNS (18), /* divsi */
1120 COSTS_N_INSNS (34), /* divdi */
1121 COSTS_N_INSNS (3), /* fp */
1122 COSTS_N_INSNS (3), /* dmul */
1123 COSTS_N_INSNS (13), /* sdiv */
1124 COSTS_N_INSNS (16), /* ddiv */
1125 128, /* cache line size */
1128 12, /* prefetch streams */
1129 COSTS_N_INSNS (3), /* SF->DF convert */
1132 /* Instruction costs on POWER8 processors. */
1134 struct processor_costs power8_cost = {
1135 COSTS_N_INSNS (3), /* mulsi */
1136 COSTS_N_INSNS (3), /* mulsi_const */
1137 COSTS_N_INSNS (3), /* mulsi_const9 */
1138 COSTS_N_INSNS (3), /* muldi */
1139 COSTS_N_INSNS (19), /* divsi */
1140 COSTS_N_INSNS (35), /* divdi */
1141 COSTS_N_INSNS (3), /* fp */
1142 COSTS_N_INSNS (3), /* dmul */
1143 COSTS_N_INSNS (14), /* sdiv */
1144 COSTS_N_INSNS (17), /* ddiv */
1145 128, /* cache line size */
1148 12, /* prefetch streams */
1149 COSTS_N_INSNS (3), /* SF->DF convert */
1152 /* Instruction costs on POWER9 processors. */
1154 struct processor_costs power9_cost = {
1155 COSTS_N_INSNS (3), /* mulsi */
1156 COSTS_N_INSNS (3), /* mulsi_const */
1157 COSTS_N_INSNS (3), /* mulsi_const9 */
1158 COSTS_N_INSNS (3), /* muldi */
1159 COSTS_N_INSNS (8), /* divsi */
1160 COSTS_N_INSNS (12), /* divdi */
1161 COSTS_N_INSNS (3), /* fp */
1162 COSTS_N_INSNS (3), /* dmul */
1163 COSTS_N_INSNS (13), /* sdiv */
1164 COSTS_N_INSNS (18), /* ddiv */
1165 128, /* cache line size */
1168 8, /* prefetch streams */
1169 COSTS_N_INSNS (3), /* SF->DF convert */
1172 /* Instruction costs on POWER A2 processors. */
1174 struct processor_costs ppca2_cost = {
1175 COSTS_N_INSNS (16), /* mulsi */
1176 COSTS_N_INSNS (16), /* mulsi_const */
1177 COSTS_N_INSNS (16), /* mulsi_const9 */
1178 COSTS_N_INSNS (16), /* muldi */
1179 COSTS_N_INSNS (22), /* divsi */
1180 COSTS_N_INSNS (28), /* divdi */
1181 COSTS_N_INSNS (3), /* fp */
1182 COSTS_N_INSNS (3), /* dmul */
1183 COSTS_N_INSNS (59), /* sdiv */
1184 COSTS_N_INSNS (72), /* ddiv */
1187 2048, /* l2 cache */
1188 16, /* prefetch streams */
1189 0, /* SF->DF convert */
1193 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1194 #undef RS6000_BUILTIN_0
1195 #undef RS6000_BUILTIN_1
1196 #undef RS6000_BUILTIN_2
1197 #undef RS6000_BUILTIN_3
1198 #undef RS6000_BUILTIN_A
1199 #undef RS6000_BUILTIN_D
1200 #undef RS6000_BUILTIN_H
1201 #undef RS6000_BUILTIN_P
1202 #undef RS6000_BUILTIN_X
1204 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1205 { NAME, ICODE, MASK, ATTR },
1207 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1208 { NAME, ICODE, MASK, ATTR },
1210 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1211 { NAME, ICODE, MASK, ATTR },
1213 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1214 { NAME, ICODE, MASK, ATTR },
1216 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1217 { NAME, ICODE, MASK, ATTR },
1219 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1220 { NAME, ICODE, MASK, ATTR },
1222 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1223 { NAME, ICODE, MASK, ATTR },
1225 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1226 { NAME, ICODE, MASK, ATTR },
1228 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1229 { NAME, ICODE, MASK, ATTR },
1231 struct rs6000_builtin_info_type {
1233 const enum insn_code icode;
1234 const HOST_WIDE_INT mask;
1235 const unsigned attr;
1238 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1240 #include "rs6000-builtin.def"
1243 #undef RS6000_BUILTIN_0
1244 #undef RS6000_BUILTIN_1
1245 #undef RS6000_BUILTIN_2
1246 #undef RS6000_BUILTIN_3
1247 #undef RS6000_BUILTIN_A
1248 #undef RS6000_BUILTIN_D
1249 #undef RS6000_BUILTIN_H
1250 #undef RS6000_BUILTIN_P
1251 #undef RS6000_BUILTIN_X
1253 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1254 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1257 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1258 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1259 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1260 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1261 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1262 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1263 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1264 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1265 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1267 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1269 static bool is_microcoded_insn (rtx_insn *);
1270 static bool is_nonpipeline_insn (rtx_insn *);
1271 static bool is_cracked_insn (rtx_insn *);
1272 static bool is_load_insn (rtx, rtx *);
1273 static bool is_store_insn (rtx, rtx *);
1274 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1275 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1276 static bool insn_must_be_first_in_group (rtx_insn *);
1277 static bool insn_must_be_last_in_group (rtx_insn *);
1278 static void altivec_init_builtins (void);
1279 static tree builtin_function_type (machine_mode, machine_mode,
1280 machine_mode, machine_mode,
1281 enum rs6000_builtins, const char *name);
1282 static void rs6000_common_init_builtins (void);
1283 static void htm_init_builtins (void);
1284 int easy_vector_constant (rtx, machine_mode);
1285 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1286 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1287 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1290 static tree get_prev_label (tree);
1292 static bool rs6000_mode_dependent_address (const_rtx);
1293 static bool rs6000_debug_mode_dependent_address (const_rtx);
1294 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1295 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1297 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1300 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1301 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1303 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1306 static bool rs6000_debug_can_change_mode_class (machine_mode,
1309 static rtx rs6000_internal_arg_pointer (void);
1311 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1312 = rs6000_mode_dependent_address;
1314 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1316 = rs6000_secondary_reload_class;
1318 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1319 = rs6000_preferred_reload_class;
1321 const int INSN_NOT_AVAILABLE = -1;
1323 static void rs6000_print_isa_options (FILE *, int, const char *,
1325 static void rs6000_print_builtin_options (FILE *, int, const char *,
1327 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1329 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1330 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1331 enum rs6000_reg_type,
1333 secondary_reload_info *,
1335 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1336 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1338 /* Hash table stuff for keeping track of TOC entries. */
1340 struct GTY((for_user)) toc_hash_struct
1342 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1343 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1345 machine_mode key_mode;
1349 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1351 static hashval_t hash (toc_hash_struct *);
1352 static bool equal (toc_hash_struct *, toc_hash_struct *);
1355 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1357 /* Hash table to keep track of the argument types for builtin functions. */
1359 struct GTY((for_user)) builtin_hash_struct
1362 machine_mode mode[4]; /* return value + 3 arguments. */
1363 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1366 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1368 static hashval_t hash (builtin_hash_struct *);
1369 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1372 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1375 /* Default register names. */
1376 char rs6000_reg_names[][8] =
1379 "0", "1", "2", "3", "4", "5", "6", "7",
1380 "8", "9", "10", "11", "12", "13", "14", "15",
1381 "16", "17", "18", "19", "20", "21", "22", "23",
1382 "24", "25", "26", "27", "28", "29", "30", "31",
1384 "0", "1", "2", "3", "4", "5", "6", "7",
1385 "8", "9", "10", "11", "12", "13", "14", "15",
1386 "16", "17", "18", "19", "20", "21", "22", "23",
1387 "24", "25", "26", "27", "28", "29", "30", "31",
1389 "0", "1", "2", "3", "4", "5", "6", "7",
1390 "8", "9", "10", "11", "12", "13", "14", "15",
1391 "16", "17", "18", "19", "20", "21", "22", "23",
1392 "24", "25", "26", "27", "28", "29", "30", "31",
1394 "lr", "ctr", "ca", "ap",
1396 "0", "1", "2", "3", "4", "5", "6", "7",
1397 /* vrsave vscr sfp */
1398 "vrsave", "vscr", "sfp",
1401 #ifdef TARGET_REGNAMES
1402 static const char alt_reg_names[][8] =
1405 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1406 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1407 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1408 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1410 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1411 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1412 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1413 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1415 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1416 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1417 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1418 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1420 "lr", "ctr", "ca", "ap",
1422 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1423 /* vrsave vscr sfp */
1424 "vrsave", "vscr", "sfp",
1428 /* Table of valid machine attributes. */
1430 static const struct attribute_spec rs6000_attribute_table[] =
1432 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1433 affects_type_identity, handler, exclude } */
1434 { "altivec", 1, 1, false, true, false, false,
1435 rs6000_handle_altivec_attribute, NULL },
1436 { "longcall", 0, 0, false, true, true, false,
1437 rs6000_handle_longcall_attribute, NULL },
1438 { "shortcall", 0, 0, false, true, true, false,
1439 rs6000_handle_longcall_attribute, NULL },
1440 { "ms_struct", 0, 0, false, false, false, false,
1441 rs6000_handle_struct_attribute, NULL },
1442 { "gcc_struct", 0, 0, false, false, false, false,
1443 rs6000_handle_struct_attribute, NULL },
1444 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1445 SUBTARGET_ATTRIBUTE_TABLE,
1447 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1450 #ifndef TARGET_PROFILE_KERNEL
1451 #define TARGET_PROFILE_KERNEL 0
1454 /* Initialize the GCC target structure. */
1455 #undef TARGET_ATTRIBUTE_TABLE
1456 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1457 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1458 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1459 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1460 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1462 #undef TARGET_ASM_ALIGNED_DI_OP
1463 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1465 /* Default unaligned ops are only provided for ELF. Find the ops needed
1466 for non-ELF systems. */
1467 #ifndef OBJECT_FORMAT_ELF
1469 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1471 #undef TARGET_ASM_UNALIGNED_HI_OP
1472 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1473 #undef TARGET_ASM_UNALIGNED_SI_OP
1474 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1475 #undef TARGET_ASM_UNALIGNED_DI_OP
1476 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1479 #undef TARGET_ASM_UNALIGNED_HI_OP
1480 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1481 #undef TARGET_ASM_UNALIGNED_SI_OP
1482 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1483 #undef TARGET_ASM_UNALIGNED_DI_OP
1484 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1485 #undef TARGET_ASM_ALIGNED_DI_OP
1486 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1490 /* This hook deals with fixups for relocatable code and DI-mode objects
1492 #undef TARGET_ASM_INTEGER
1493 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1495 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1496 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1497 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1500 #undef TARGET_SET_UP_BY_PROLOGUE
1501 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1503 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1504 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1505 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1506 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1507 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1508 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1509 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1510 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1511 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1512 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1513 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1514 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1516 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1517 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1519 #undef TARGET_INTERNAL_ARG_POINTER
1520 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1522 #undef TARGET_HAVE_TLS
1523 #define TARGET_HAVE_TLS HAVE_AS_TLS
1525 #undef TARGET_CANNOT_FORCE_CONST_MEM
1526 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1528 #undef TARGET_DELEGITIMIZE_ADDRESS
1529 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1531 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1532 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1534 #undef TARGET_LEGITIMATE_COMBINED_INSN
1535 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1537 #undef TARGET_ASM_FUNCTION_PROLOGUE
1538 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1539 #undef TARGET_ASM_FUNCTION_EPILOGUE
1540 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1542 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1543 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1545 #undef TARGET_LEGITIMIZE_ADDRESS
1546 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1548 #undef TARGET_SCHED_VARIABLE_ISSUE
1549 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1551 #undef TARGET_SCHED_ISSUE_RATE
1552 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1553 #undef TARGET_SCHED_ADJUST_COST
1554 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1555 #undef TARGET_SCHED_ADJUST_PRIORITY
1556 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1557 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1558 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1559 #undef TARGET_SCHED_INIT
1560 #define TARGET_SCHED_INIT rs6000_sched_init
1561 #undef TARGET_SCHED_FINISH
1562 #define TARGET_SCHED_FINISH rs6000_sched_finish
1563 #undef TARGET_SCHED_REORDER
1564 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1565 #undef TARGET_SCHED_REORDER2
1566 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1568 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1569 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1571 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1572 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1574 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1575 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1576 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1577 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1578 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1579 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1580 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1581 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1583 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1584 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1586 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1587 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1588 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1589 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1590 rs6000_builtin_support_vector_misalignment
1591 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1592 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1593 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1594 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1595 rs6000_builtin_vectorization_cost
1596 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1597 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1598 rs6000_preferred_simd_mode
1599 #undef TARGET_VECTORIZE_INIT_COST
1600 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1601 #undef TARGET_VECTORIZE_ADD_STMT_COST
1602 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1603 #undef TARGET_VECTORIZE_FINISH_COST
1604 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1605 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1606 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1608 #undef TARGET_INIT_BUILTINS
1609 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1610 #undef TARGET_BUILTIN_DECL
1611 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1613 #undef TARGET_FOLD_BUILTIN
1614 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1615 #undef TARGET_GIMPLE_FOLD_BUILTIN
1616 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1618 #undef TARGET_EXPAND_BUILTIN
1619 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1621 #undef TARGET_MANGLE_TYPE
1622 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1624 #undef TARGET_INIT_LIBFUNCS
1625 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1628 #undef TARGET_BINDS_LOCAL_P
1629 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1632 #undef TARGET_MS_BITFIELD_LAYOUT_P
1633 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1635 #undef TARGET_ASM_OUTPUT_MI_THUNK
1636 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1638 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1639 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1641 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1642 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1644 #undef TARGET_REGISTER_MOVE_COST
1645 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1646 #undef TARGET_MEMORY_MOVE_COST
1647 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1648 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1649 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1650 rs6000_ira_change_pseudo_allocno_class
1651 #undef TARGET_CANNOT_COPY_INSN_P
1652 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1653 #undef TARGET_RTX_COSTS
1654 #define TARGET_RTX_COSTS rs6000_rtx_costs
1655 #undef TARGET_ADDRESS_COST
1656 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1657 #undef TARGET_INSN_COST
1658 #define TARGET_INSN_COST rs6000_insn_cost
1660 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1661 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1663 #undef TARGET_PROMOTE_FUNCTION_MODE
1664 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1666 #undef TARGET_RETURN_IN_MEMORY
1667 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1669 #undef TARGET_RETURN_IN_MSB
1670 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1672 #undef TARGET_SETUP_INCOMING_VARARGS
1673 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1675 /* Always strict argument naming on rs6000. */
1676 #undef TARGET_STRICT_ARGUMENT_NAMING
1677 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1678 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1679 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1680 #undef TARGET_SPLIT_COMPLEX_ARG
1681 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1682 #undef TARGET_MUST_PASS_IN_STACK
1683 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1684 #undef TARGET_PASS_BY_REFERENCE
1685 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1686 #undef TARGET_ARG_PARTIAL_BYTES
1687 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1688 #undef TARGET_FUNCTION_ARG_ADVANCE
1689 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1690 #undef TARGET_FUNCTION_ARG
1691 #define TARGET_FUNCTION_ARG rs6000_function_arg
1692 #undef TARGET_FUNCTION_ARG_PADDING
1693 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1694 #undef TARGET_FUNCTION_ARG_BOUNDARY
1695 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1697 #undef TARGET_BUILD_BUILTIN_VA_LIST
1698 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1700 #undef TARGET_EXPAND_BUILTIN_VA_START
1701 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1703 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1704 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1706 #undef TARGET_EH_RETURN_FILTER_MODE
1707 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1709 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1710 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1712 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1713 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1715 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1716 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1718 #undef TARGET_FLOATN_MODE
1719 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1721 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1722 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1724 #undef TARGET_MD_ASM_ADJUST
1725 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1727 #undef TARGET_OPTION_OVERRIDE
1728 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1730 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1731 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1732 rs6000_builtin_vectorized_function
1734 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1735 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1736 rs6000_builtin_md_vectorized_function
1738 #undef TARGET_STACK_PROTECT_GUARD
1739 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1742 #undef TARGET_STACK_PROTECT_FAIL
1743 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1747 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1748 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1751 /* Use a 32-bit anchor range. This leads to sequences like:
1753 addis tmp,anchor,high
1756 where tmp itself acts as an anchor, and can be shared between
1757 accesses to the same 64k page. */
1758 #undef TARGET_MIN_ANCHOR_OFFSET
1759 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1760 #undef TARGET_MAX_ANCHOR_OFFSET
1761 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1762 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1763 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1764 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1765 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1767 #undef TARGET_BUILTIN_RECIPROCAL
1768 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1770 #undef TARGET_SECONDARY_RELOAD
1771 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1772 #undef TARGET_SECONDARY_MEMORY_NEEDED
1773 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1774 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1775 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1777 #undef TARGET_LEGITIMATE_ADDRESS_P
1778 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1780 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1781 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1783 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1784 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1786 #undef TARGET_CAN_ELIMINATE
1787 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1789 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1790 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1792 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1793 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1795 #undef TARGET_TRAMPOLINE_INIT
1796 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1798 #undef TARGET_FUNCTION_VALUE
1799 #define TARGET_FUNCTION_VALUE rs6000_function_value
1801 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1802 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1804 #undef TARGET_OPTION_SAVE
1805 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1807 #undef TARGET_OPTION_RESTORE
1808 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1810 #undef TARGET_OPTION_PRINT
1811 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1813 #undef TARGET_CAN_INLINE_P
1814 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1816 #undef TARGET_SET_CURRENT_FUNCTION
1817 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1819 #undef TARGET_LEGITIMATE_CONSTANT_P
1820 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1822 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1823 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1825 #undef TARGET_CAN_USE_DOLOOP_P
1826 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1828 #undef TARGET_PREDICT_DOLOOP_P
1829 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1831 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1832 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1834 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1835 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1836 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1837 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1838 #undef TARGET_UNWIND_WORD_MODE
1839 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1841 #undef TARGET_OFFLOAD_OPTIONS
1842 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1844 #undef TARGET_C_MODE_FOR_SUFFIX
1845 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1847 #undef TARGET_INVALID_BINARY_OP
1848 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1850 #undef TARGET_OPTAB_SUPPORTED_P
1851 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1853 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1854 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1856 #undef TARGET_COMPARE_VERSION_PRIORITY
1857 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1859 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1860 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1861 rs6000_generate_version_dispatcher_body
1863 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1864 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1865 rs6000_get_function_versions_dispatcher
1867 #undef TARGET_OPTION_FUNCTION_VERSIONS
1868 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1870 #undef TARGET_HARD_REGNO_NREGS
1871 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1872 #undef TARGET_HARD_REGNO_MODE_OK
1873 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1875 #undef TARGET_MODES_TIEABLE_P
1876 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1878 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1879 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1880 rs6000_hard_regno_call_part_clobbered
1882 #undef TARGET_SLOW_UNALIGNED_ACCESS
1883 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1885 #undef TARGET_CAN_CHANGE_MODE_CLASS
1886 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1888 #undef TARGET_CONSTANT_ALIGNMENT
1889 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1891 #undef TARGET_STARTING_FRAME_OFFSET
1892 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1894 #if TARGET_ELF && RS6000_WEAK
1895 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1896 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1899 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1900 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1902 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1903 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1906 /* Processor table. */
1909 const char *const name; /* Canonical processor name. */
1910 const enum processor_type processor; /* Processor type enum value. */
1911 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1914 static struct rs6000_ptt const processor_target_table[] =
1916 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1917 #include "rs6000-cpus.def"
1921 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1925 rs6000_cpu_name_lookup (const char *name)
1931 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1932 if (! strcmp (name, processor_target_table[i].name))
1940 /* Return number of consecutive hard regs needed starting at reg REGNO
1941 to hold something of mode MODE.
1942 This is ordinarily the length in words of a value of mode MODE
1943 but can be less for certain modes in special long registers.
1945 POWER and PowerPC GPRs hold 32 bits worth;
1946 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1949 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1951 unsigned HOST_WIDE_INT reg_size;
1953 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1954 128-bit floating point that can go in vector registers, which has VSX
1955 memory addressing. */
1956 if (FP_REGNO_P (regno))
1957 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1958 ? UNITS_PER_VSX_WORD
1959 : UNITS_PER_FP_WORD);
1961 else if (ALTIVEC_REGNO_P (regno))
1962 reg_size = UNITS_PER_ALTIVEC_WORD;
1965 reg_size = UNITS_PER_WORD;
1967 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1970 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1973 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1975 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1977 if (COMPLEX_MODE_P (mode))
1978 mode = GET_MODE_INNER (mode);
1980 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1981 register combinations, and use PTImode where we need to deal with quad
1982 word memory operations. Don't allow quad words in the argument or frame
1983 pointer registers, just registers 0..31. */
1984 if (mode == PTImode)
1985 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1986 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1987 && ((regno & 1) == 0));
1989 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1990 implementations. Don't allow an item to be split between a FP register
1991 and an Altivec register. Allow TImode in all VSX registers if the user
1993 if (TARGET_VSX && VSX_REGNO_P (regno)
1994 && (VECTOR_MEM_VSX_P (mode)
1995 || FLOAT128_VECTOR_P (mode)
1996 || reg_addr[mode].scalar_in_vmx_p
1998 || (TARGET_VADDUQM && mode == V1TImode)))
2000 if (FP_REGNO_P (regno))
2001 return FP_REGNO_P (last_regno);
2003 if (ALTIVEC_REGNO_P (regno))
2005 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2008 return ALTIVEC_REGNO_P (last_regno);
2012 /* The GPRs can hold any mode, but values bigger than one register
2013 cannot go past R31. */
2014 if (INT_REGNO_P (regno))
2015 return INT_REGNO_P (last_regno);
2017 /* The float registers (except for VSX vector modes) can only hold floating
2018 modes and DImode. */
2019 if (FP_REGNO_P (regno))
2021 if (FLOAT128_VECTOR_P (mode))
2024 if (SCALAR_FLOAT_MODE_P (mode)
2025 && (mode != TDmode || (regno % 2) == 0)
2026 && FP_REGNO_P (last_regno))
2029 if (GET_MODE_CLASS (mode) == MODE_INT)
2031 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2034 if (TARGET_P8_VECTOR && (mode == SImode))
2037 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
2044 /* The CR register can only hold CC modes. */
2045 if (CR_REGNO_P (regno))
2046 return GET_MODE_CLASS (mode) == MODE_CC;
2048 if (CA_REGNO_P (regno))
2049 return mode == Pmode || mode == SImode;
2051 /* AltiVec only in AldyVec registers. */
2052 if (ALTIVEC_REGNO_P (regno))
2053 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2054 || mode == V1TImode);
2056 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2057 and it must be able to fit within the register set. */
2059 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2062 /* Implement TARGET_HARD_REGNO_NREGS. */
2065 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2067 return rs6000_hard_regno_nregs[mode][regno];
2070 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2073 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2075 return rs6000_hard_regno_mode_ok_p[mode][regno];
2078 /* Implement TARGET_MODES_TIEABLE_P.
2080 PTImode cannot tie with other modes because PTImode is restricted to even
2081 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2084 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2085 128-bit floating point on VSX systems ties with other vectors. */
2088 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2090 if (mode1 == PTImode)
2091 return mode2 == PTImode;
2092 if (mode2 == PTImode)
2095 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2096 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2097 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2100 if (SCALAR_FLOAT_MODE_P (mode1))
2101 return SCALAR_FLOAT_MODE_P (mode2);
2102 if (SCALAR_FLOAT_MODE_P (mode2))
2105 if (GET_MODE_CLASS (mode1) == MODE_CC)
2106 return GET_MODE_CLASS (mode2) == MODE_CC;
2107 if (GET_MODE_CLASS (mode2) == MODE_CC)
2113 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2116 rs6000_hard_regno_call_part_clobbered (rtx_insn *insn ATTRIBUTE_UNUSED,
2117 unsigned int regno, machine_mode mode)
2121 && GET_MODE_SIZE (mode) > 4
2122 && INT_REGNO_P (regno))
2126 && FP_REGNO_P (regno)
2127 && GET_MODE_SIZE (mode) > 8
2128 && !FLOAT128_2REG_P (mode))
2134 /* Print interesting facts about registers. */
2136 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2140 for (r = first_regno; r <= last_regno; ++r)
2142 const char *comma = "";
2145 if (first_regno == last_regno)
2146 fprintf (stderr, "%s:\t", reg_name);
2148 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2151 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2152 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2156 fprintf (stderr, ",\n\t");
2161 if (rs6000_hard_regno_nregs[m][r] > 1)
2162 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2163 rs6000_hard_regno_nregs[m][r]);
2165 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2170 if (call_used_regs[r])
2174 fprintf (stderr, ",\n\t");
2179 len += fprintf (stderr, "%s%s", comma, "call-used");
2187 fprintf (stderr, ",\n\t");
2192 len += fprintf (stderr, "%s%s", comma, "fixed");
2198 fprintf (stderr, ",\n\t");
2202 len += fprintf (stderr, "%sreg-class = %s", comma,
2203 reg_class_names[(int)rs6000_regno_regclass[r]]);
2208 fprintf (stderr, ",\n\t");
2212 fprintf (stderr, "%sregno = %d\n", comma, r);
2217 rs6000_debug_vector_unit (enum rs6000_vector v)
2223 case VECTOR_NONE: ret = "none"; break;
2224 case VECTOR_ALTIVEC: ret = "altivec"; break;
2225 case VECTOR_VSX: ret = "vsx"; break;
2226 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2227 default: ret = "unknown"; break;
2233 /* Inner function printing just the address mask for a particular reload
2235 DEBUG_FUNCTION char *
2236 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2241 if ((mask & RELOAD_REG_VALID) != 0)
2243 else if (keep_spaces)
2246 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2248 else if (keep_spaces)
2251 if ((mask & RELOAD_REG_INDEXED) != 0)
2253 else if (keep_spaces)
2256 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2258 else if ((mask & RELOAD_REG_OFFSET) != 0)
2260 else if (keep_spaces)
2263 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2265 else if (keep_spaces)
2268 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2270 else if (keep_spaces)
2273 if ((mask & RELOAD_REG_AND_M16) != 0)
2275 else if (keep_spaces)
2283 /* Print the address masks in a human readble fashion. */
2285 rs6000_debug_print_mode (ssize_t m)
2290 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2291 for (rc = 0; rc < N_RELOAD_REG; rc++)
2292 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2293 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2295 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2296 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2298 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2299 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2300 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2304 spaces += sizeof (" Reload=sl") - 1;
2306 if (reg_addr[m].scalar_in_vmx_p)
2308 fprintf (stderr, "%*s Upper=y", spaces, "");
2312 spaces += sizeof (" Upper=y") - 1;
2314 if (rs6000_vector_unit[m] != VECTOR_NONE
2315 || rs6000_vector_mem[m] != VECTOR_NONE)
2317 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2319 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2320 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2323 fputs ("\n", stderr);
2326 #define DEBUG_FMT_ID "%-32s= "
2327 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2328 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2329 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2331 /* Print various interesting information with -mdebug=reg. */
2333 rs6000_debug_reg_global (void)
2335 static const char *const tf[2] = { "false", "true" };
2336 const char *nl = (const char *)0;
2339 char costly_num[20];
2341 char flags_buffer[40];
2342 const char *costly_str;
2343 const char *nop_str;
2344 const char *trace_str;
2345 const char *abi_str;
2346 const char *cmodel_str;
2347 struct cl_target_option cl_opts;
2349 /* Modes we want tieable information on. */
2350 static const machine_mode print_tieable_modes[] = {
2384 /* Virtual regs we are interested in. */
2385 const static struct {
2386 int regno; /* register number. */
2387 const char *name; /* register name. */
2388 } virtual_regs[] = {
2389 { STACK_POINTER_REGNUM, "stack pointer:" },
2390 { TOC_REGNUM, "toc: " },
2391 { STATIC_CHAIN_REGNUM, "static chain: " },
2392 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2393 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2394 { ARG_POINTER_REGNUM, "arg pointer: " },
2395 { FRAME_POINTER_REGNUM, "frame pointer:" },
2396 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2397 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2398 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2399 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2400 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2401 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2402 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2403 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2404 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2407 fputs ("\nHard register information:\n", stderr);
2408 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2409 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2410 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2413 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2414 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2415 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2416 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2417 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2418 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2420 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2421 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2422 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2426 "d reg_class = %s\n"
2427 "f reg_class = %s\n"
2428 "v reg_class = %s\n"
2429 "wa reg_class = %s\n"
2430 "we reg_class = %s\n"
2431 "wr reg_class = %s\n"
2432 "wx reg_class = %s\n"
2433 "wA reg_class = %s\n"
2435 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2436 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2437 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2438 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2439 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2440 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2441 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2442 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2445 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2446 rs6000_debug_print_mode (m);
2448 fputs ("\n", stderr);
2450 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2452 machine_mode mode1 = print_tieable_modes[m1];
2453 bool first_time = true;
2455 nl = (const char *)0;
2456 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2458 machine_mode mode2 = print_tieable_modes[m2];
2459 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2463 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2468 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2473 fputs ("\n", stderr);
2479 if (rs6000_recip_control)
2481 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2483 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2484 if (rs6000_recip_bits[m])
2487 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2489 (RS6000_RECIP_AUTO_RE_P (m)
2491 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2492 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2494 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2497 fputs ("\n", stderr);
2500 if (rs6000_cpu_index >= 0)
2502 const char *name = processor_target_table[rs6000_cpu_index].name;
2504 = processor_target_table[rs6000_cpu_index].target_enable;
2506 sprintf (flags_buffer, "-mcpu=%s flags", name);
2507 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2510 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2512 if (rs6000_tune_index >= 0)
2514 const char *name = processor_target_table[rs6000_tune_index].name;
2516 = processor_target_table[rs6000_tune_index].target_enable;
2518 sprintf (flags_buffer, "-mtune=%s flags", name);
2519 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2522 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2524 cl_target_option_save (&cl_opts, &global_options);
2525 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2528 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2529 rs6000_isa_flags_explicit);
2531 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2532 rs6000_builtin_mask);
2534 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2536 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2537 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2539 switch (rs6000_sched_costly_dep)
2541 case max_dep_latency:
2542 costly_str = "max_dep_latency";
2546 costly_str = "no_dep_costly";
2549 case all_deps_costly:
2550 costly_str = "all_deps_costly";
2553 case true_store_to_load_dep_costly:
2554 costly_str = "true_store_to_load_dep_costly";
2557 case store_to_load_dep_costly:
2558 costly_str = "store_to_load_dep_costly";
2562 costly_str = costly_num;
2563 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2567 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2569 switch (rs6000_sched_insert_nops)
2571 case sched_finish_regroup_exact:
2572 nop_str = "sched_finish_regroup_exact";
2575 case sched_finish_pad_groups:
2576 nop_str = "sched_finish_pad_groups";
2579 case sched_finish_none:
2580 nop_str = "sched_finish_none";
2585 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2589 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2591 switch (rs6000_sdata)
2598 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2602 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2606 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2611 switch (rs6000_traceback)
2613 case traceback_default: trace_str = "default"; break;
2614 case traceback_none: trace_str = "none"; break;
2615 case traceback_part: trace_str = "part"; break;
2616 case traceback_full: trace_str = "full"; break;
2617 default: trace_str = "unknown"; break;
2620 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2622 switch (rs6000_current_cmodel)
2624 case CMODEL_SMALL: cmodel_str = "small"; break;
2625 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2626 case CMODEL_LARGE: cmodel_str = "large"; break;
2627 default: cmodel_str = "unknown"; break;
2630 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2632 switch (rs6000_current_abi)
2634 case ABI_NONE: abi_str = "none"; break;
2635 case ABI_AIX: abi_str = "aix"; break;
2636 case ABI_ELFv2: abi_str = "ELFv2"; break;
2637 case ABI_V4: abi_str = "V4"; break;
2638 case ABI_DARWIN: abi_str = "darwin"; break;
2639 default: abi_str = "unknown"; break;
2642 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2644 if (rs6000_altivec_abi)
2645 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2647 if (rs6000_darwin64_abi)
2648 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2650 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2651 (TARGET_SOFT_FLOAT ? "true" : "false"));
2653 if (TARGET_LINK_STACK)
2654 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2656 if (TARGET_P8_FUSION)
2660 strcpy (options, "power8");
2661 if (TARGET_P8_FUSION_SIGN)
2662 strcat (options, ", sign");
2664 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2667 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2668 TARGET_SECURE_PLT ? "secure" : "bss");
2669 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2670 aix_struct_return ? "aix" : "sysv");
2671 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2672 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2673 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2674 tf[!!rs6000_align_branch_targets]);
2675 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2676 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2677 rs6000_long_double_type_size);
2678 if (rs6000_long_double_type_size > 64)
2680 fprintf (stderr, DEBUG_FMT_S, "long double type",
2681 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2682 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2683 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2685 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2686 (int)rs6000_sched_restricted_insns_priority);
2687 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2689 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2690 (int)RS6000_BUILTIN_COUNT);
2692 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2693 (int)TARGET_FLOAT128_ENABLE_TYPE);
2696 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2697 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2699 if (TARGET_DIRECT_MOVE_128)
2700 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2701 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2705 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2706 legitimate address support to figure out the appropriate addressing to
2710 rs6000_setup_reg_addr_masks (void)
2712 ssize_t rc, reg, m, nregs;
2713 addr_mask_type any_addr_mask, addr_mask;
2715 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2717 machine_mode m2 = (machine_mode) m;
2718 bool complex_p = false;
2719 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2722 if (COMPLEX_MODE_P (m2))
2725 m2 = GET_MODE_INNER (m2);
2728 msize = GET_MODE_SIZE (m2);
2730 /* SDmode is special in that we want to access it only via REG+REG
2731 addressing on power7 and above, since we want to use the LFIWZX and
2732 STFIWZX instructions to load it. */
2733 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2736 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2739 reg = reload_reg_map[rc].reg;
2741 /* Can mode values go in the GPR/FPR/Altivec registers? */
2742 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2744 bool small_int_vsx_p = (small_int_p
2745 && (rc == RELOAD_REG_FPR
2746 || rc == RELOAD_REG_VMX));
2748 nregs = rs6000_hard_regno_nregs[m][reg];
2749 addr_mask |= RELOAD_REG_VALID;
2751 /* Indicate if the mode takes more than 1 physical register. If
2752 it takes a single register, indicate it can do REG+REG
2753 addressing. Small integers in VSX registers can only do
2754 REG+REG addressing. */
2755 if (small_int_vsx_p)
2756 addr_mask |= RELOAD_REG_INDEXED;
2757 else if (nregs > 1 || m == BLKmode || complex_p)
2758 addr_mask |= RELOAD_REG_MULTIPLE;
2760 addr_mask |= RELOAD_REG_INDEXED;
2762 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2763 addressing. If we allow scalars into Altivec registers,
2764 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2766 For VSX systems, we don't allow update addressing for
2767 DFmode/SFmode if those registers can go in both the
2768 traditional floating point registers and Altivec registers.
2769 The load/store instructions for the Altivec registers do not
2770 have update forms. If we allowed update addressing, it seems
2771 to break IV-OPT code using floating point if the index type is
2772 int instead of long (PR target/81550 and target/84042). */
2775 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2777 && !VECTOR_MODE_P (m2)
2778 && !FLOAT128_VECTOR_P (m2)
2780 && (m != E_DFmode || !TARGET_VSX)
2781 && (m != E_SFmode || !TARGET_P8_VECTOR)
2782 && !small_int_vsx_p)
2784 addr_mask |= RELOAD_REG_PRE_INCDEC;
2786 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2787 we don't allow PRE_MODIFY for some multi-register
2792 addr_mask |= RELOAD_REG_PRE_MODIFY;
2796 if (TARGET_POWERPC64)
2797 addr_mask |= RELOAD_REG_PRE_MODIFY;
2802 if (TARGET_HARD_FLOAT)
2803 addr_mask |= RELOAD_REG_PRE_MODIFY;
2809 /* GPR and FPR registers can do REG+OFFSET addressing, except
2810 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2811 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2812 if ((addr_mask != 0) && !indexed_only_p
2814 && (rc == RELOAD_REG_GPR
2815 || ((msize == 8 || m2 == SFmode)
2816 && (rc == RELOAD_REG_FPR
2817 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2818 addr_mask |= RELOAD_REG_OFFSET;
2820 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2821 instructions are enabled. The offset for 128-bit VSX registers is
2822 only 12-bits. While GPRs can handle the full offset range, VSX
2823 registers can only handle the restricted range. */
2824 else if ((addr_mask != 0) && !indexed_only_p
2825 && msize == 16 && TARGET_P9_VECTOR
2826 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2827 || (m2 == TImode && TARGET_VSX)))
2829 addr_mask |= RELOAD_REG_OFFSET;
2830 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2831 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2834 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2835 addressing on 128-bit types. */
2836 if (rc == RELOAD_REG_VMX && msize == 16
2837 && (addr_mask & RELOAD_REG_VALID) != 0)
2838 addr_mask |= RELOAD_REG_AND_M16;
2840 reg_addr[m].addr_mask[rc] = addr_mask;
2841 any_addr_mask |= addr_mask;
2844 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2849 /* Initialize the various global tables that are based on register size. */
2851 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2857 /* Precalculate REGNO_REG_CLASS. */
2858 rs6000_regno_regclass[0] = GENERAL_REGS;
2859 for (r = 1; r < 32; ++r)
2860 rs6000_regno_regclass[r] = BASE_REGS;
2862 for (r = 32; r < 64; ++r)
2863 rs6000_regno_regclass[r] = FLOAT_REGS;
2865 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2866 rs6000_regno_regclass[r] = NO_REGS;
2868 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2869 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2871 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2872 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2873 rs6000_regno_regclass[r] = CR_REGS;
2875 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2876 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2877 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2878 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2879 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2880 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2881 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2883 /* Precalculate register class to simpler reload register class. We don't
2884 need all of the register classes that are combinations of different
2885 classes, just the simple ones that have constraint letters. */
2886 for (c = 0; c < N_REG_CLASSES; c++)
2887 reg_class_to_reg_type[c] = NO_REG_TYPE;
2889 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2890 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2891 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2892 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2893 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2894 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2895 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2896 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2897 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2898 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2902 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2903 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2907 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2908 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2911 /* Precalculate the valid memory formats as well as the vector information,
2912 this must be set up before the rs6000_hard_regno_nregs_internal calls
2914 gcc_assert ((int)VECTOR_NONE == 0);
2915 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2916 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2918 gcc_assert ((int)CODE_FOR_nothing == 0);
2919 memset ((void *) ®_addr[0], '\0', sizeof (reg_addr));
2921 gcc_assert ((int)NO_REGS == 0);
2922 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2924 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2925 believes it can use native alignment or still uses 128-bit alignment. */
2926 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2937 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2938 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2939 if (TARGET_FLOAT128_TYPE)
2941 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2942 rs6000_vector_align[KFmode] = 128;
2944 if (FLOAT128_IEEE_P (TFmode))
2946 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2947 rs6000_vector_align[TFmode] = 128;
2951 /* V2DF mode, VSX only. */
2954 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2955 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2956 rs6000_vector_align[V2DFmode] = align64;
2959 /* V4SF mode, either VSX or Altivec. */
2962 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2963 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2964 rs6000_vector_align[V4SFmode] = align32;
2966 else if (TARGET_ALTIVEC)
2968 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2969 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2970 rs6000_vector_align[V4SFmode] = align32;
2973 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2977 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2978 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2979 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2980 rs6000_vector_align[V4SImode] = align32;
2981 rs6000_vector_align[V8HImode] = align32;
2982 rs6000_vector_align[V16QImode] = align32;
2986 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2987 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2988 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2992 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2993 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2994 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2998 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2999 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3002 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3003 rs6000_vector_unit[V2DImode]
3004 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3005 rs6000_vector_align[V2DImode] = align64;
3007 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3008 rs6000_vector_unit[V1TImode]
3009 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3010 rs6000_vector_align[V1TImode] = 128;
3013 /* DFmode, see if we want to use the VSX unit. Memory is handled
3014 differently, so don't set rs6000_vector_mem. */
3017 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3018 rs6000_vector_align[DFmode] = 64;
3021 /* SFmode, see if we want to use the VSX unit. */
3022 if (TARGET_P8_VECTOR)
3024 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3025 rs6000_vector_align[SFmode] = 32;
3028 /* Allow TImode in VSX register and set the VSX memory macros. */
3031 rs6000_vector_mem[TImode] = VECTOR_VSX;
3032 rs6000_vector_align[TImode] = align64;
3035 /* Register class constraints for the constraints that depend on compile
3036 switches. When the VSX code was added, different constraints were added
3037 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3038 of the VSX registers are used. The register classes for scalar floating
3039 point types is set, based on whether we allow that type into the upper
3040 (Altivec) registers. GCC has register classes to target the Altivec
3041 registers for load/store operations, to select using a VSX memory
3042 operation instead of the traditional floating point operation. The
3045 d - Register class to use with traditional DFmode instructions.
3046 f - Register class to use with traditional SFmode instructions.
3047 v - Altivec register.
3048 wa - Any VSX register.
3049 wc - Reserved to represent individual CR bits (used in LLVM).
3050 wn - always NO_REGS.
3051 wr - GPR if 64-bit mode is permitted.
3052 wx - Float register if we can do 32-bit int stores. */
3054 if (TARGET_HARD_FLOAT)
3056 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3057 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3061 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3063 /* Add conditional constraints based on various options, to allow us to
3064 collapse multiple insn patterns. */
3066 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3068 if (TARGET_POWERPC64)
3070 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3071 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3075 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3077 /* Support for new direct moves (ISA 3.0 + 64bit). */
3078 if (TARGET_DIRECT_MOVE_128)
3079 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3081 /* Set up the reload helper and direct move functions. */
3082 if (TARGET_VSX || TARGET_ALTIVEC)
3086 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3087 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3088 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3089 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3090 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3091 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3092 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3093 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3094 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3095 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3096 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3097 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3098 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3099 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3100 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3101 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3102 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3103 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3104 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3105 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3107 if (FLOAT128_VECTOR_P (KFmode))
3109 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3110 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3113 if (FLOAT128_VECTOR_P (TFmode))
3115 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3116 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3119 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3121 if (TARGET_NO_SDMODE_STACK)
3123 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3124 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3129 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3130 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3133 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3135 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3136 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3137 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3138 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3139 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3140 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3141 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3142 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3143 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3145 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3146 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3147 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3148 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3149 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3150 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3151 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3152 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3153 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3155 if (FLOAT128_VECTOR_P (KFmode))
3157 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3158 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3161 if (FLOAT128_VECTOR_P (TFmode))
3163 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3164 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3170 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3171 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3172 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3173 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3174 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3175 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3176 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3177 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3178 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3179 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3180 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3181 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3182 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3183 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3184 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3185 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3186 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3187 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3188 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3189 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3191 if (FLOAT128_VECTOR_P (KFmode))
3193 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3194 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3197 if (FLOAT128_IEEE_P (TFmode))
3199 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3200 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3203 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3205 if (TARGET_NO_SDMODE_STACK)
3207 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3208 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3213 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3214 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3217 if (TARGET_DIRECT_MOVE)
3219 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3220 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3221 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3225 reg_addr[DFmode].scalar_in_vmx_p = true;
3226 reg_addr[DImode].scalar_in_vmx_p = true;
3228 if (TARGET_P8_VECTOR)
3230 reg_addr[SFmode].scalar_in_vmx_p = true;
3231 reg_addr[SImode].scalar_in_vmx_p = true;
3233 if (TARGET_P9_VECTOR)
3235 reg_addr[HImode].scalar_in_vmx_p = true;
3236 reg_addr[QImode].scalar_in_vmx_p = true;
3241 /* Precalculate HARD_REGNO_NREGS. */
3242 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3243 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3244 rs6000_hard_regno_nregs[m][r]
3245 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3247 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3248 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3249 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3250 rs6000_hard_regno_mode_ok_p[m][r]
3251 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3253 /* Precalculate CLASS_MAX_NREGS sizes. */
3254 for (c = 0; c < LIM_REG_CLASSES; ++c)
3258 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3259 reg_size = UNITS_PER_VSX_WORD;
3261 else if (c == ALTIVEC_REGS)
3262 reg_size = UNITS_PER_ALTIVEC_WORD;
3264 else if (c == FLOAT_REGS)
3265 reg_size = UNITS_PER_FP_WORD;
3268 reg_size = UNITS_PER_WORD;
3270 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3272 machine_mode m2 = (machine_mode)m;
3273 int reg_size2 = reg_size;
3275 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3277 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3278 reg_size2 = UNITS_PER_FP_WORD;
3280 rs6000_class_max_nregs[m][c]
3281 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3285 /* Calculate which modes to automatically generate code to use a the
3286 reciprocal divide and square root instructions. In the future, possibly
3287 automatically generate the instructions even if the user did not specify
3288 -mrecip. The older machines double precision reciprocal sqrt estimate is
3289 not accurate enough. */
3290 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3292 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3294 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3295 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3296 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3297 if (VECTOR_UNIT_VSX_P (V2DFmode))
3298 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3300 if (TARGET_FRSQRTES)
3301 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3303 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3304 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3305 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3306 if (VECTOR_UNIT_VSX_P (V2DFmode))
3307 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3309 if (rs6000_recip_control)
3311 if (!flag_finite_math_only)
3312 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3314 if (flag_trapping_math)
3315 warning (0, "%qs requires %qs or %qs", "-mrecip",
3316 "-fno-trapping-math", "-ffast-math");
3317 if (!flag_reciprocal_math)
3318 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3320 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3322 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3323 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3324 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3326 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3327 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3328 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3330 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3331 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3332 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3334 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3335 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3336 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3338 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3339 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3340 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3342 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3343 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3344 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3346 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3347 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3348 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3350 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3351 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3352 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3356 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3357 legitimate address support to figure out the appropriate addressing to
3359 rs6000_setup_reg_addr_masks ();
3361 if (global_init_p || TARGET_DEBUG_TARGET)
3363 if (TARGET_DEBUG_REG)
3364 rs6000_debug_reg_global ();
3366 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3368 "SImode variable mult cost = %d\n"
3369 "SImode constant mult cost = %d\n"
3370 "SImode short constant mult cost = %d\n"
3371 "DImode multipliciation cost = %d\n"
3372 "SImode division cost = %d\n"
3373 "DImode division cost = %d\n"
3374 "Simple fp operation cost = %d\n"
3375 "DFmode multiplication cost = %d\n"
3376 "SFmode division cost = %d\n"
3377 "DFmode division cost = %d\n"
3378 "cache line size = %d\n"
3379 "l1 cache size = %d\n"
3380 "l2 cache size = %d\n"
3381 "simultaneous prefetches = %d\n"
3384 rs6000_cost->mulsi_const,
3385 rs6000_cost->mulsi_const9,
3393 rs6000_cost->cache_line_size,
3394 rs6000_cost->l1_cache_size,
3395 rs6000_cost->l2_cache_size,
3396 rs6000_cost->simultaneous_prefetches);
3401 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3404 darwin_rs6000_override_options (void)
3406 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3408 rs6000_altivec_abi = 1;
3409 TARGET_ALTIVEC_VRSAVE = 1;
3410 rs6000_current_abi = ABI_DARWIN;
3412 if (DEFAULT_ABI == ABI_DARWIN
3414 darwin_one_byte_bool = 1;
3416 if (TARGET_64BIT && ! TARGET_POWERPC64)
3418 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3419 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3422 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3423 optimisation, and will not work with the most generic case (where the
3424 symbol is undefined external, but there is no symbl stub). */
3426 rs6000_default_long_calls = 0;
3428 /* ld_classic is (so far) still used for kernel (static) code, and supports
3429 the JBSR longcall / branch islands. */
3432 rs6000_default_long_calls = 1;
3434 /* Allow a kext author to do -mkernel -mhard-float. */
3435 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3436 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3439 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3441 if (!flag_mkernel && !flag_apple_kext
3443 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3444 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3446 /* Unless the user (not the configurer) has explicitly overridden
3447 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3448 G4 unless targeting the kernel. */
3451 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3452 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3453 && ! global_options_set.x_rs6000_cpu_index)
3455 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3460 /* If not otherwise specified by a target, make 'long double' equivalent to
3463 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3464 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3467 /* Return the builtin mask of the various options used that could affect which
3468 builtins were used. In the past we used target_flags, but we've run out of
3469 bits, and some options are no longer in target_flags. */
3472 rs6000_builtin_mask_calculate (void)
3474 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3475 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3476 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3477 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3478 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3479 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3480 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3481 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3482 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3483 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3484 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3485 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3486 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3487 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3488 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3489 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3490 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3491 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3492 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3493 | ((TARGET_LONG_DOUBLE_128
3494 && TARGET_HARD_FLOAT
3495 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3496 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3497 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3500 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3501 to clobber the XER[CA] bit because clobbering that bit without telling
3502 the compiler worked just fine with versions of GCC before GCC 5, and
3503 breaking a lot of older code in ways that are hard to track down is
3504 not such a great idea. */
3507 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3508 vec<const char *> &/*constraints*/,
3509 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3511 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3512 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3516 /* Override command line options.
3518 Combine build-specific configuration information with options
3519 specified on the command line to set various state variables which
3520 influence code generation, optimization, and expansion of built-in
3521 functions. Assure that command-line configuration preferences are
3522 compatible with each other and with the build configuration; issue
3523 warnings while adjusting configuration or error messages while
3524 rejecting configuration.
3526 Upon entry to this function:
3528 This function is called once at the beginning of
3529 compilation, and then again at the start and end of compiling
3530 each section of code that has a different configuration, as
3531 indicated, for example, by adding the
3533 __attribute__((__target__("cpu=power9")))
3535 qualifier to a function definition or, for example, by bracketing
3538 #pragma GCC target("altivec")
3542 #pragma GCC reset_options
3544 directives. Parameter global_init_p is true for the initial
3545 invocation, which initializes global variables, and false for all
3546 subsequent invocations.
3549 Various global state information is assumed to be valid. This
3550 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3551 default CPU specified at build configure time, TARGET_DEFAULT,
3552 representing the default set of option flags for the default
3553 target, and global_options_set.x_rs6000_isa_flags, representing
3554 which options were requested on the command line.
3556 Upon return from this function:
3558 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3559 was set by name on the command line. Additionally, if certain
3560 attributes are automatically enabled or disabled by this function
3561 in order to assure compatibility between options and
3562 configuration, the flags associated with those attributes are
3563 also set. By setting these "explicit bits", we avoid the risk
3564 that other code might accidentally overwrite these particular
3565 attributes with "default values".
3567 The various bits of rs6000_isa_flags are set to indicate the
3568 target options that have been selected for the most current
3569 compilation efforts. This has the effect of also turning on the
3570 associated TARGET_XXX values since these are macros which are
3571 generally defined to test the corresponding bit of the
3572 rs6000_isa_flags variable.
3574 The variable rs6000_builtin_mask is set to represent the target
3575 options for the most current compilation efforts, consistent with
3576 the current contents of rs6000_isa_flags. This variable controls
3577 expansion of built-in functions.
3579 Various other global variables and fields of global structures
3580 (over 50 in all) are initialized to reflect the desired options
3581 for the most current compilation efforts. */
3584 rs6000_option_override_internal (bool global_init_p)
3588 HOST_WIDE_INT set_masks;
3589 HOST_WIDE_INT ignore_masks;
3592 struct cl_target_option *main_target_opt
3593 = ((global_init_p || target_option_default_node == NULL)
3594 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3596 /* Print defaults. */
3597 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3598 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3600 /* Remember the explicit arguments. */
3602 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3604 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3605 library functions, so warn about it. The flag may be useful for
3606 performance studies from time to time though, so don't disable it
3608 if (global_options_set.x_rs6000_alignment_flags
3609 && rs6000_alignment_flags == MASK_ALIGN_POWER
3610 && DEFAULT_ABI == ABI_DARWIN
3612 warning (0, "%qs is not supported for 64-bit Darwin;"
3613 " it is incompatible with the installed C and C++ libraries",
3616 /* Numerous experiment shows that IRA based loop pressure
3617 calculation works better for RTL loop invariant motion on targets
3618 with enough (>= 32) registers. It is an expensive optimization.
3619 So it is on only for peak performance. */
3620 if (optimize >= 3 && global_init_p
3621 && !global_options_set.x_flag_ira_loop_pressure)
3622 flag_ira_loop_pressure = 1;
3624 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3625 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3626 options were already specified. */
3627 if (flag_sanitize & SANITIZE_USER_ADDRESS
3628 && !global_options_set.x_flag_asynchronous_unwind_tables)
3629 flag_asynchronous_unwind_tables = 1;
3631 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3632 loop unroller is active. It is only checked during unrolling, so
3633 we can just set it on by default. */
3634 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3635 flag_variable_expansion_in_unroller = 1;
3637 /* Set the pointer size. */
3640 rs6000_pmode = DImode;
3641 rs6000_pointer_size = 64;
3645 rs6000_pmode = SImode;
3646 rs6000_pointer_size = 32;
3649 /* Some OSs don't support saving the high part of 64-bit registers on context
3650 switch. Other OSs don't support saving Altivec registers. On those OSs,
3651 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3652 if the user wants either, the user must explicitly specify them and we
3653 won't interfere with the user's specification. */
3655 set_masks = POWERPC_MASKS;
3656 #ifdef OS_MISSING_POWERPC64
3657 if (OS_MISSING_POWERPC64)
3658 set_masks &= ~OPTION_MASK_POWERPC64;
3660 #ifdef OS_MISSING_ALTIVEC
3661 if (OS_MISSING_ALTIVEC)
3662 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3663 | OTHER_VSX_VECTOR_MASKS);
3666 /* Don't override by the processor default if given explicitly. */
3667 set_masks &= ~rs6000_isa_flags_explicit;
3669 if (global_init_p && rs6000_dejagnu_cpu_index >= 0)
3670 rs6000_cpu_index = rs6000_dejagnu_cpu_index;
3672 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3673 the cpu in a target attribute or pragma, but did not specify a tuning
3674 option, use the cpu for the tuning option rather than the option specified
3675 with -mtune on the command line. Process a '--with-cpu' configuration
3676 request as an implicit --cpu. */
3677 if (rs6000_cpu_index >= 0)
3678 cpu_index = rs6000_cpu_index;
3679 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3680 cpu_index = main_target_opt->x_rs6000_cpu_index;
3681 else if (OPTION_TARGET_CPU_DEFAULT)
3682 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3684 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3685 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3686 with those from the cpu, except for options that were explicitly set. If
3687 we don't have a cpu, do not override the target bits set in
3691 rs6000_cpu_index = cpu_index;
3692 rs6000_isa_flags &= ~set_masks;
3693 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3698 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3699 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3700 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3701 to using rs6000_isa_flags, we need to do the initialization here.
3703 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3704 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3705 HOST_WIDE_INT flags;
3707 flags = TARGET_DEFAULT;
3710 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3711 const char *default_cpu = (!TARGET_POWERPC64
3716 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3717 flags = processor_target_table[default_cpu_index].target_enable;
3719 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3722 if (rs6000_tune_index >= 0)
3723 tune_index = rs6000_tune_index;
3724 else if (cpu_index >= 0)
3725 rs6000_tune_index = tune_index = cpu_index;
3729 enum processor_type tune_proc
3730 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3733 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3734 if (processor_target_table[i].processor == tune_proc)
3742 rs6000_cpu = processor_target_table[cpu_index].processor;
3744 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3746 gcc_assert (tune_index >= 0);
3747 rs6000_tune = processor_target_table[tune_index].processor;
3749 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3750 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3751 || rs6000_cpu == PROCESSOR_PPCE5500)
3754 error ("AltiVec not supported in this target");
3757 /* If we are optimizing big endian systems for space, use the load/store
3758 multiple instructions. */
3759 if (BYTES_BIG_ENDIAN && optimize_size)
3760 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3762 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3763 because the hardware doesn't support the instructions used in little
3764 endian mode, and causes an alignment trap. The 750 does not cause an
3765 alignment trap (except when the target is unaligned). */
3767 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3769 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3770 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3771 warning (0, "%qs is not supported on little endian systems",
3775 /* If little-endian, default to -mstrict-align on older processors.
3776 Testing for htm matches power8 and later. */
3777 if (!BYTES_BIG_ENDIAN
3778 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3779 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3781 if (!rs6000_fold_gimple)
3783 "gimple folding of rs6000 builtins has been disabled.\n");
3785 /* Add some warnings for VSX. */
3788 const char *msg = NULL;
3789 if (!TARGET_HARD_FLOAT)
3791 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3792 msg = N_("%<-mvsx%> requires hardware floating point");
3795 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3796 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3799 else if (TARGET_AVOID_XFORM > 0)
3800 msg = N_("%<-mvsx%> needs indexed addressing");
3801 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3802 & OPTION_MASK_ALTIVEC))
3804 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3805 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3807 msg = N_("%<-mno-altivec%> disables vsx");
3813 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3814 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3818 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3819 the -mcpu setting to enable options that conflict. */
3820 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3821 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3822 | OPTION_MASK_ALTIVEC
3823 | OPTION_MASK_VSX)) != 0)
3824 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3825 | OPTION_MASK_DIRECT_MOVE)
3826 & ~rs6000_isa_flags_explicit);
3828 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3829 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3831 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3832 off all of the options that depend on those flags. */
3833 ignore_masks = rs6000_disable_incompatible_switches ();
3835 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3836 unless the user explicitly used the -mno-<option> to disable the code. */
3837 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3838 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3839 else if (TARGET_P9_MINMAX)
3843 if (cpu_index == PROCESSOR_POWER9)
3845 /* legacy behavior: allow -mcpu=power9 with certain
3846 capabilities explicitly disabled. */
3847 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3850 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3851 "for <xxx> less than power9", "-mcpu");
3853 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3854 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3855 & rs6000_isa_flags_explicit))
3856 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3857 were explicitly cleared. */
3858 error ("%qs incompatible with explicitly disabled options",
3861 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3863 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3864 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3865 else if (TARGET_VSX)
3866 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3867 else if (TARGET_POPCNTD)
3868 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3869 else if (TARGET_DFP)
3870 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3871 else if (TARGET_CMPB)
3872 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3873 else if (TARGET_FPRND)
3874 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3875 else if (TARGET_POPCNTB)
3876 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3877 else if (TARGET_ALTIVEC)
3878 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3880 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3882 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3883 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3884 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3887 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3889 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3890 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3891 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3894 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3896 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3897 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3898 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3901 if (TARGET_P8_VECTOR && !TARGET_VSX)
3903 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3904 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3905 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3906 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3908 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3909 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3910 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3914 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3916 rs6000_isa_flags |= OPTION_MASK_VSX;
3917 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3921 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3923 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3924 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3925 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3928 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3929 silently turn off quad memory mode. */
3930 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3932 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3933 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3935 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3936 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3938 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3939 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3942 /* Non-atomic quad memory load/store are disabled for little endian, since
3943 the words are reversed, but atomic operations can still be done by
3944 swapping the words. */
3945 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3947 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3948 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3951 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3954 /* Assume if the user asked for normal quad memory instructions, they want
3955 the atomic versions as well, unless they explicity told us not to use quad
3956 word atomic instructions. */
3957 if (TARGET_QUAD_MEMORY
3958 && !TARGET_QUAD_MEMORY_ATOMIC
3959 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3960 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3962 /* If we can shrink-wrap the TOC register save separately, then use
3963 -msave-toc-indirect unless explicitly disabled. */
3964 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3965 && flag_shrink_wrap_separate
3966 && optimize_function_for_speed_p (cfun))
3967 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3969 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3970 generating power8 instructions. Power9 does not optimize power8 fusion
3972 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3974 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3975 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3977 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3980 /* Setting additional fusion flags turns on base fusion. */
3981 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3983 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3985 if (TARGET_P8_FUSION_SIGN)
3986 error ("%qs requires %qs", "-mpower8-fusion-sign",
3989 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3992 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3995 /* Power8 does not fuse sign extended loads with the addis. If we are
3996 optimizing at high levels for speed, convert a sign extended load into a
3997 zero extending load, and an explicit sign extension. */
3998 if (TARGET_P8_FUSION
3999 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4000 && optimize_function_for_speed_p (cfun)
4002 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4004 /* ISA 3.0 vector instructions include ISA 2.07. */
4005 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4007 /* We prefer to not mention undocumented options in
4008 error messages. However, if users have managed to select
4009 power9-vector without selecting power8-vector, they
4010 already know about undocumented flags. */
4011 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4012 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4013 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4014 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4016 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4017 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4018 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4022 /* OPTION_MASK_P9_VECTOR is explicit and
4023 OPTION_MASK_P8_VECTOR is not explicit. */
4024 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4025 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4029 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4030 support. If we only have ISA 2.06 support, and the user did not specify
4031 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4032 but we don't enable the full vectorization support */
4033 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4034 TARGET_ALLOW_MOVMISALIGN = 1;
4036 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4038 if (TARGET_ALLOW_MOVMISALIGN > 0
4039 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4040 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4042 TARGET_ALLOW_MOVMISALIGN = 0;
4045 /* Determine when unaligned vector accesses are permitted, and when
4046 they are preferred over masked Altivec loads. Note that if
4047 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4048 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4050 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4054 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4055 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4057 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4060 else if (!TARGET_ALLOW_MOVMISALIGN)
4062 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4063 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4064 "-mallow-movmisalign");
4066 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4070 /* Use long double size to select the appropriate long double. We use
4071 TYPE_PRECISION to differentiate the 3 different long double types. We map
4072 128 into the precision used for TFmode. */
4073 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4075 : FLOAT_PRECISION_TFmode);
4077 /* Set long double size before the IEEE 128-bit tests. */
4078 if (!global_options_set.x_rs6000_long_double_type_size)
4080 if (main_target_opt != NULL
4081 && (main_target_opt->x_rs6000_long_double_type_size
4082 != default_long_double_size))
4083 error ("target attribute or pragma changes %<long double%> size");
4085 rs6000_long_double_type_size = default_long_double_size;
4087 else if (rs6000_long_double_type_size == 128)
4088 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4089 else if (global_options_set.x_rs6000_ieeequad)
4091 if (global_options.x_rs6000_ieeequad)
4092 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4094 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4097 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4098 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4099 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4100 those systems will not pick up this default. Warn if the user changes the
4101 default unless -Wno-psabi. */
4102 if (!global_options_set.x_rs6000_ieeequad)
4103 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4107 if (global_options.x_rs6000_ieeequad
4108 && (!TARGET_POPCNTD || !TARGET_VSX))
4109 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4111 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4113 static bool warned_change_long_double;
4114 if (!warned_change_long_double)
4116 warned_change_long_double = true;
4117 if (TARGET_IEEEQUAD)
4118 warning (OPT_Wpsabi, "Using IEEE extended precision "
4121 warning (OPT_Wpsabi, "Using IBM extended precision "
4127 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4128 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
4129 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4130 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4131 the keyword as well as the type. */
4132 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4134 /* IEEE 128-bit floating point requires VSX support. */
4135 if (TARGET_FLOAT128_KEYWORD)
4139 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4140 error ("%qs requires VSX support", "%<-mfloat128%>");
4142 TARGET_FLOAT128_TYPE = 0;
4143 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4144 | OPTION_MASK_FLOAT128_HW);
4146 else if (!TARGET_FLOAT128_TYPE)
4148 TARGET_FLOAT128_TYPE = 1;
4149 warning (0, "The %<-mfloat128%> option may not be fully supported");
4153 /* Enable the __float128 keyword under Linux by default. */
4154 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4155 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4156 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4158 /* If we have are supporting the float128 type and full ISA 3.0 support,
4159 enable -mfloat128-hardware by default. However, don't enable the
4160 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4161 because sometimes the compiler wants to put things in an integer
4162 container, and if we don't have __int128 support, it is impossible. */
4163 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4164 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4165 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4166 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4168 if (TARGET_FLOAT128_HW
4169 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4171 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4172 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4174 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4177 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4179 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4180 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4182 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4185 /* -mprefixed-addr (and hence -mpcrel) requires -mcpu=future. */
4186 if (TARGET_PREFIXED_ADDR && !TARGET_FUTURE)
4188 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4189 error ("%qs requires %qs", "-mpcrel", "-mcpu=future");
4190 else if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED_ADDR) != 0)
4191 error ("%qs requires %qs", "-mprefixed-addr", "-mcpu=future");
4193 rs6000_isa_flags &= ~(OPTION_MASK_PCREL | OPTION_MASK_PREFIXED_ADDR);
4196 /* -mpcrel requires prefixed load/store addressing. */
4197 if (TARGET_PCREL && !TARGET_PREFIXED_ADDR)
4199 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4200 error ("%qs requires %qs", "-mpcrel", "-mprefixed-addr");
4202 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4205 /* Print the options after updating the defaults. */
4206 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4207 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4209 /* E500mc does "better" if we inline more aggressively. Respect the
4210 user's opinion, though. */
4211 if (rs6000_block_move_inline_limit == 0
4212 && (rs6000_tune == PROCESSOR_PPCE500MC
4213 || rs6000_tune == PROCESSOR_PPCE500MC64
4214 || rs6000_tune == PROCESSOR_PPCE5500
4215 || rs6000_tune == PROCESSOR_PPCE6500))
4216 rs6000_block_move_inline_limit = 128;
4218 /* store_one_arg depends on expand_block_move to handle at least the
4219 size of reg_parm_stack_space. */
4220 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4221 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4225 /* If the appropriate debug option is enabled, replace the target hooks
4226 with debug versions that call the real version and then prints
4227 debugging information. */
4228 if (TARGET_DEBUG_COST)
4230 targetm.rtx_costs = rs6000_debug_rtx_costs;
4231 targetm.address_cost = rs6000_debug_address_cost;
4232 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4235 if (TARGET_DEBUG_ADDR)
4237 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4238 targetm.legitimize_address = rs6000_debug_legitimize_address;
4239 rs6000_secondary_reload_class_ptr
4240 = rs6000_debug_secondary_reload_class;
4241 targetm.secondary_memory_needed
4242 = rs6000_debug_secondary_memory_needed;
4243 targetm.can_change_mode_class
4244 = rs6000_debug_can_change_mode_class;
4245 rs6000_preferred_reload_class_ptr
4246 = rs6000_debug_preferred_reload_class;
4247 rs6000_mode_dependent_address_ptr
4248 = rs6000_debug_mode_dependent_address;
4251 if (rs6000_veclibabi_name)
4253 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4254 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4257 error ("unknown vectorization library ABI type (%qs) for "
4258 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4264 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4265 target attribute or pragma which automatically enables both options,
4266 unless the altivec ABI was set. This is set by default for 64-bit, but
4268 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4270 TARGET_FLOAT128_TYPE = 0;
4271 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4272 | OPTION_MASK_FLOAT128_KEYWORD)
4273 & ~rs6000_isa_flags_explicit);
4276 /* Enable Altivec ABI for AIX -maltivec. */
4277 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4279 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4280 error ("target attribute or pragma changes AltiVec ABI");
4282 rs6000_altivec_abi = 1;
4285 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4286 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4287 be explicitly overridden in either case. */
4290 if (!global_options_set.x_rs6000_altivec_abi
4291 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4293 if (main_target_opt != NULL &&
4294 !main_target_opt->x_rs6000_altivec_abi)
4295 error ("target attribute or pragma changes AltiVec ABI");
4297 rs6000_altivec_abi = 1;
4301 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4302 So far, the only darwin64 targets are also MACH-O. */
4304 && DEFAULT_ABI == ABI_DARWIN
4307 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4308 error ("target attribute or pragma changes darwin64 ABI");
4311 rs6000_darwin64_abi = 1;
4312 /* Default to natural alignment, for better performance. */
4313 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4317 /* Place FP constants in the constant pool instead of TOC
4318 if section anchors enabled. */
4319 if (flag_section_anchors
4320 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4321 TARGET_NO_FP_IN_TOC = 1;
4323 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4324 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4326 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4327 SUBTARGET_OVERRIDE_OPTIONS;
4329 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4330 SUBSUBTARGET_OVERRIDE_OPTIONS;
4332 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4333 SUB3TARGET_OVERRIDE_OPTIONS;
4336 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4337 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4339 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4340 && rs6000_tune != PROCESSOR_POWER5
4341 && rs6000_tune != PROCESSOR_POWER6
4342 && rs6000_tune != PROCESSOR_POWER7
4343 && rs6000_tune != PROCESSOR_POWER8
4344 && rs6000_tune != PROCESSOR_POWER9
4345 && rs6000_tune != PROCESSOR_FUTURE
4346 && rs6000_tune != PROCESSOR_PPCA2
4347 && rs6000_tune != PROCESSOR_CELL
4348 && rs6000_tune != PROCESSOR_PPC476);
4349 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4350 || rs6000_tune == PROCESSOR_POWER5
4351 || rs6000_tune == PROCESSOR_POWER7
4352 || rs6000_tune == PROCESSOR_POWER8);
4353 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4354 || rs6000_tune == PROCESSOR_POWER5
4355 || rs6000_tune == PROCESSOR_POWER6
4356 || rs6000_tune == PROCESSOR_POWER7
4357 || rs6000_tune == PROCESSOR_POWER8
4358 || rs6000_tune == PROCESSOR_POWER9
4359 || rs6000_tune == PROCESSOR_FUTURE
4360 || rs6000_tune == PROCESSOR_PPCE500MC
4361 || rs6000_tune == PROCESSOR_PPCE500MC64
4362 || rs6000_tune == PROCESSOR_PPCE5500
4363 || rs6000_tune == PROCESSOR_PPCE6500);
4365 /* Allow debug switches to override the above settings. These are set to -1
4366 in rs6000.opt to indicate the user hasn't directly set the switch. */
4367 if (TARGET_ALWAYS_HINT >= 0)
4368 rs6000_always_hint = TARGET_ALWAYS_HINT;
4370 if (TARGET_SCHED_GROUPS >= 0)
4371 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4373 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4374 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4376 rs6000_sched_restricted_insns_priority
4377 = (rs6000_sched_groups ? 1 : 0);
4379 /* Handle -msched-costly-dep option. */
4380 rs6000_sched_costly_dep
4381 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4383 if (rs6000_sched_costly_dep_str)
4385 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4386 rs6000_sched_costly_dep = no_dep_costly;
4387 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4388 rs6000_sched_costly_dep = all_deps_costly;
4389 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4390 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4391 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4392 rs6000_sched_costly_dep = store_to_load_dep_costly;
4394 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4395 atoi (rs6000_sched_costly_dep_str));
4398 /* Handle -minsert-sched-nops option. */
4399 rs6000_sched_insert_nops
4400 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4402 if (rs6000_sched_insert_nops_str)
4404 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4405 rs6000_sched_insert_nops = sched_finish_none;
4406 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4407 rs6000_sched_insert_nops = sched_finish_pad_groups;
4408 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4409 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4411 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4412 atoi (rs6000_sched_insert_nops_str));
4415 /* Handle stack protector */
4416 if (!global_options_set.x_rs6000_stack_protector_guard)
4417 #ifdef TARGET_THREAD_SSP_OFFSET
4418 rs6000_stack_protector_guard = SSP_TLS;
4420 rs6000_stack_protector_guard = SSP_GLOBAL;
4423 #ifdef TARGET_THREAD_SSP_OFFSET
4424 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4425 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4428 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4431 const char *str = rs6000_stack_protector_guard_offset_str;
4434 long offset = strtol (str, &endp, 0);
4435 if (!*str || *endp || errno)
4436 error ("%qs is not a valid number in %qs", str,
4437 "-mstack-protector-guard-offset=");
4439 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4440 || (TARGET_64BIT && (offset & 3)))
4441 error ("%qs is not a valid offset in %qs", str,
4442 "-mstack-protector-guard-offset=");
4444 rs6000_stack_protector_guard_offset = offset;
4447 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4449 const char *str = rs6000_stack_protector_guard_reg_str;
4450 int reg = decode_reg_name (str);
4452 if (!IN_RANGE (reg, 1, 31))
4453 error ("%qs is not a valid base register in %qs", str,
4454 "-mstack-protector-guard-reg=");
4456 rs6000_stack_protector_guard_reg = reg;
4459 if (rs6000_stack_protector_guard == SSP_TLS
4460 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4461 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4465 #ifdef TARGET_REGNAMES
4466 /* If the user desires alternate register names, copy in the
4467 alternate names now. */
4468 if (TARGET_REGNAMES)
4469 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4472 /* Set aix_struct_return last, after the ABI is determined.
4473 If -maix-struct-return or -msvr4-struct-return was explicitly
4474 used, don't override with the ABI default. */
4475 if (!global_options_set.x_aix_struct_return)
4476 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4479 /* IBM XL compiler defaults to unsigned bitfields. */
4480 if (TARGET_XL_COMPAT)
4481 flag_signed_bitfields = 0;
4484 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4485 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4487 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4489 /* We can only guarantee the availability of DI pseudo-ops when
4490 assembling for 64-bit targets. */
4493 targetm.asm_out.aligned_op.di = NULL;
4494 targetm.asm_out.unaligned_op.di = NULL;
4498 /* Set branch target alignment, if not optimizing for size. */
4501 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4502 aligned 8byte to avoid misprediction by the branch predictor. */
4503 if (rs6000_tune == PROCESSOR_TITAN
4504 || rs6000_tune == PROCESSOR_CELL)
4506 if (flag_align_functions && !str_align_functions)
4507 str_align_functions = "8";
4508 if (flag_align_jumps && !str_align_jumps)
4509 str_align_jumps = "8";
4510 if (flag_align_loops && !str_align_loops)
4511 str_align_loops = "8";
4513 if (rs6000_align_branch_targets)
4515 if (flag_align_functions && !str_align_functions)
4516 str_align_functions = "16";
4517 if (flag_align_jumps && !str_align_jumps)
4518 str_align_jumps = "16";
4519 if (flag_align_loops && !str_align_loops)
4521 can_override_loop_align = 1;
4522 str_align_loops = "16";
4526 if (flag_align_jumps && !str_align_jumps)
4527 str_align_jumps = "16";
4528 if (flag_align_loops && !str_align_loops)
4529 str_align_loops = "16";
4532 /* Arrange to save and restore machine status around nested functions. */
4533 init_machine_status = rs6000_init_machine_status;
4535 /* We should always be splitting complex arguments, but we can't break
4536 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4537 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4538 targetm.calls.split_complex_arg = NULL;
4540 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4541 if (DEFAULT_ABI == ABI_AIX)
4542 targetm.calls.custom_function_descriptors = 0;
4545 /* Initialize rs6000_cost with the appropriate target costs. */
4547 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4549 switch (rs6000_tune)
4551 case PROCESSOR_RS64A:
4552 rs6000_cost = &rs64a_cost;
4555 case PROCESSOR_MPCCORE:
4556 rs6000_cost = &mpccore_cost;
4559 case PROCESSOR_PPC403:
4560 rs6000_cost = &ppc403_cost;
4563 case PROCESSOR_PPC405:
4564 rs6000_cost = &ppc405_cost;
4567 case PROCESSOR_PPC440:
4568 rs6000_cost = &ppc440_cost;
4571 case PROCESSOR_PPC476:
4572 rs6000_cost = &ppc476_cost;
4575 case PROCESSOR_PPC601:
4576 rs6000_cost = &ppc601_cost;
4579 case PROCESSOR_PPC603:
4580 rs6000_cost = &ppc603_cost;
4583 case PROCESSOR_PPC604:
4584 rs6000_cost = &ppc604_cost;
4587 case PROCESSOR_PPC604e:
4588 rs6000_cost = &ppc604e_cost;
4591 case PROCESSOR_PPC620:
4592 rs6000_cost = &ppc620_cost;
4595 case PROCESSOR_PPC630:
4596 rs6000_cost = &ppc630_cost;
4599 case PROCESSOR_CELL:
4600 rs6000_cost = &ppccell_cost;
4603 case PROCESSOR_PPC750:
4604 case PROCESSOR_PPC7400:
4605 rs6000_cost = &ppc750_cost;
4608 case PROCESSOR_PPC7450:
4609 rs6000_cost = &ppc7450_cost;
4612 case PROCESSOR_PPC8540:
4613 case PROCESSOR_PPC8548:
4614 rs6000_cost = &ppc8540_cost;
4617 case PROCESSOR_PPCE300C2:
4618 case PROCESSOR_PPCE300C3:
4619 rs6000_cost = &ppce300c2c3_cost;
4622 case PROCESSOR_PPCE500MC:
4623 rs6000_cost = &ppce500mc_cost;
4626 case PROCESSOR_PPCE500MC64:
4627 rs6000_cost = &ppce500mc64_cost;
4630 case PROCESSOR_PPCE5500:
4631 rs6000_cost = &ppce5500_cost;
4634 case PROCESSOR_PPCE6500:
4635 rs6000_cost = &ppce6500_cost;
4638 case PROCESSOR_TITAN:
4639 rs6000_cost = &titan_cost;
4642 case PROCESSOR_POWER4:
4643 case PROCESSOR_POWER5:
4644 rs6000_cost = &power4_cost;
4647 case PROCESSOR_POWER6:
4648 rs6000_cost = &power6_cost;
4651 case PROCESSOR_POWER7:
4652 rs6000_cost = &power7_cost;
4655 case PROCESSOR_POWER8:
4656 rs6000_cost = &power8_cost;
4659 case PROCESSOR_POWER9:
4660 case PROCESSOR_FUTURE:
4661 rs6000_cost = &power9_cost;
4664 case PROCESSOR_PPCA2:
4665 rs6000_cost = &ppca2_cost;
4674 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4675 rs6000_cost->simultaneous_prefetches,
4676 global_options.x_param_values,
4677 global_options_set.x_param_values);
4678 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4679 global_options.x_param_values,
4680 global_options_set.x_param_values);
4681 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4682 rs6000_cost->cache_line_size,
4683 global_options.x_param_values,
4684 global_options_set.x_param_values);
4685 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4686 global_options.x_param_values,
4687 global_options_set.x_param_values);
4689 /* Increase loop peeling limits based on performance analysis. */
4690 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4691 global_options.x_param_values,
4692 global_options_set.x_param_values);
4693 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4694 global_options.x_param_values,
4695 global_options_set.x_param_values);
4697 /* Use the 'model' -fsched-pressure algorithm by default. */
4698 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
4699 SCHED_PRESSURE_MODEL,
4700 global_options.x_param_values,
4701 global_options_set.x_param_values);
4703 /* If using typedef char *va_list, signal that
4704 __builtin_va_start (&ap, 0) can be optimized to
4705 ap = __builtin_next_arg (0). */
4706 if (DEFAULT_ABI != ABI_V4)
4707 targetm.expand_builtin_va_start = NULL;
4710 /* If not explicitly specified via option, decide whether to generate indexed
4711 load/store instructions. A value of -1 indicates that the
4712 initial value of this variable has not been overwritten. During
4713 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4714 if (TARGET_AVOID_XFORM == -1)
4715 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4716 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4717 need indexed accesses and the type used is the scalar type of the element
4718 being loaded or stored. */
4719 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4720 && !TARGET_ALTIVEC);
4722 /* Set the -mrecip options. */
4723 if (rs6000_recip_name)
4725 char *p = ASTRDUP (rs6000_recip_name);
4727 unsigned int mask, i;
4730 while ((q = strtok (p, ",")) != NULL)
4741 if (!strcmp (q, "default"))
4742 mask = ((TARGET_RECIP_PRECISION)
4743 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4746 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4747 if (!strcmp (q, recip_options[i].string))
4749 mask = recip_options[i].mask;
4753 if (i == ARRAY_SIZE (recip_options))
4755 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4763 rs6000_recip_control &= ~mask;
4765 rs6000_recip_control |= mask;
4769 /* Set the builtin mask of the various options used that could affect which
4770 builtins were used. In the past we used target_flags, but we've run out
4771 of bits, and some options are no longer in target_flags. */
4772 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4773 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4774 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4775 rs6000_builtin_mask);
4777 /* Initialize all of the registers. */
4778 rs6000_init_hard_regno_mode_ok (global_init_p);
4780 /* Save the initial options in case the user does function specific options */
4782 target_option_default_node = target_option_current_node
4783 = build_target_option_node (&global_options);
4785 /* If not explicitly specified via option, decide whether to generate the
4786 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4787 if (TARGET_LINK_STACK == -1)
4788 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4790 /* Deprecate use of -mno-speculate-indirect-jumps. */
4791 if (!rs6000_speculate_indirect_jumps)
4792 warning (0, "%qs is deprecated and not recommended in any circumstances",
4793 "-mno-speculate-indirect-jumps");
4798 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4799 define the target cpu type. */
4802 rs6000_option_override (void)
4804 (void) rs6000_option_override_internal (true);
4808 /* Implement targetm.vectorize.builtin_mask_for_load. */
4810 rs6000_builtin_mask_for_load (void)
4812 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4813 if ((TARGET_ALTIVEC && !TARGET_VSX)
4814 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4815 return altivec_builtin_mask_for_load;
4820 /* Implement LOOP_ALIGN. */
4822 rs6000_loop_align (rtx label)
4827 /* Don't override loop alignment if -falign-loops was specified. */
4828 if (!can_override_loop_align)
4831 bb = BLOCK_FOR_INSN (label);
4832 ninsns = num_loop_insns(bb->loop_father);
4834 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4835 if (ninsns > 4 && ninsns <= 8
4836 && (rs6000_tune == PROCESSOR_POWER4
4837 || rs6000_tune == PROCESSOR_POWER5
4838 || rs6000_tune == PROCESSOR_POWER6
4839 || rs6000_tune == PROCESSOR_POWER7
4840 || rs6000_tune == PROCESSOR_POWER8))
4841 return align_flags (5);
4846 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4847 after applying N number of iterations. This routine does not determine
4848 how may iterations are required to reach desired alignment. */
4851 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4858 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4861 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4871 /* Assuming that all other types are naturally aligned. CHECKME! */
4876 /* Return true if the vector misalignment factor is supported by the
4879 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4886 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4889 /* Return if movmisalign pattern is not supported for this mode. */
4890 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4893 if (misalignment == -1)
4895 /* Misalignment factor is unknown at compile time but we know
4896 it's word aligned. */
4897 if (rs6000_vector_alignment_reachable (type, is_packed))
4899 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4901 if (element_size == 64 || element_size == 32)
4908 /* VSX supports word-aligned vector. */
4909 if (misalignment % 4 == 0)
4915 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4917 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4918 tree vectype, int misalign)
4923 switch (type_of_cost)
4933 case cond_branch_not_taken:
4942 case vec_promote_demote:
4948 case cond_branch_taken:
4951 case unaligned_load:
4952 case vector_gather_load:
4953 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4956 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4958 elements = TYPE_VECTOR_SUBPARTS (vectype);
4960 /* Double word aligned. */
4968 /* Double word aligned. */
4972 /* Unknown misalignment. */
4985 /* Misaligned loads are not supported. */
4990 case unaligned_store:
4991 case vector_scatter_store:
4992 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4995 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4997 elements = TYPE_VECTOR_SUBPARTS (vectype);
4999 /* Double word aligned. */
5007 /* Double word aligned. */
5011 /* Unknown misalignment. */
5024 /* Misaligned stores are not supported. */
5030 /* This is a rough approximation assuming non-constant elements
5031 constructed into a vector via element insertion. FIXME:
5032 vec_construct is not granular enough for uniformly good
5033 decisions. If the initialization is a splat, this is
5034 cheaper than we estimate. Improve this someday. */
5035 elem_type = TREE_TYPE (vectype);
5036 /* 32-bit vectors loaded into registers are stored as double
5037 precision, so we need 2 permutes, 2 converts, and 1 merge
5038 to construct a vector of short floats from them. */
5039 if (SCALAR_FLOAT_TYPE_P (elem_type)
5040 && TYPE_PRECISION (elem_type) == 32)
5042 /* On POWER9, integer vector types are built up in GPRs and then
5043 use a direct move (2 cycles). For POWER8 this is even worse,
5044 as we need two direct moves and a merge, and the direct moves
5046 else if (INTEGRAL_TYPE_P (elem_type))
5048 if (TARGET_P9_VECTOR)
5049 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5051 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5054 /* V2DFmode doesn't need a direct move. */
5062 /* Implement targetm.vectorize.preferred_simd_mode. */
5065 rs6000_preferred_simd_mode (scalar_mode mode)
5074 if (TARGET_ALTIVEC || TARGET_VSX)
5094 typedef struct _rs6000_cost_data
5096 struct loop *loop_info;
5100 /* Test for likely overcommitment of vector hardware resources. If a
5101 loop iteration is relatively large, and too large a percentage of
5102 instructions in the loop are vectorized, the cost model may not
5103 adequately reflect delays from unavailable vector resources.
5104 Penalize the loop body cost for this case. */
5107 rs6000_density_test (rs6000_cost_data *data)
5109 const int DENSITY_PCT_THRESHOLD = 85;
5110 const int DENSITY_SIZE_THRESHOLD = 70;
5111 const int DENSITY_PENALTY = 10;
5112 struct loop *loop = data->loop_info;
5113 basic_block *bbs = get_loop_body (loop);
5114 int nbbs = loop->num_nodes;
5115 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
5116 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5119 for (i = 0; i < nbbs; i++)
5121 basic_block bb = bbs[i];
5122 gimple_stmt_iterator gsi;
5124 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5126 gimple *stmt = gsi_stmt (gsi);
5127 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5129 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5130 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5136 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5138 if (density_pct > DENSITY_PCT_THRESHOLD
5139 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5141 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5142 if (dump_enabled_p ())
5143 dump_printf_loc (MSG_NOTE, vect_location,
5144 "density %d%%, cost %d exceeds threshold, penalizing "
5145 "loop body cost by %d%%", density_pct,
5146 vec_cost + not_vec_cost, DENSITY_PENALTY);
5150 /* Implement targetm.vectorize.init_cost. */
5152 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5153 instruction is needed by the vectorization. */
5154 static bool rs6000_vect_nonmem;
5157 rs6000_init_cost (struct loop *loop_info)
5159 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5160 data->loop_info = loop_info;
5161 data->cost[vect_prologue] = 0;
5162 data->cost[vect_body] = 0;
5163 data->cost[vect_epilogue] = 0;
5164 rs6000_vect_nonmem = false;
5168 /* Implement targetm.vectorize.add_stmt_cost. */
5171 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5172 struct _stmt_vec_info *stmt_info, int misalign,
5173 enum vect_cost_model_location where)
5175 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5176 unsigned retval = 0;
5178 if (flag_vect_cost_model)
5180 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5181 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5183 /* Statements in an inner loop relative to the loop being
5184 vectorized are weighted more heavily. The value here is
5185 arbitrary and could potentially be improved with analysis. */
5186 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5187 count *= 50; /* FIXME. */
5189 retval = (unsigned) (count * stmt_cost);
5190 cost_data->cost[where] += retval;
5192 /* Check whether we're doing something other than just a copy loop.
5193 Not all such loops may be profitably vectorized; see
5194 rs6000_finish_cost. */
5195 if ((kind == vec_to_scalar || kind == vec_perm
5196 || kind == vec_promote_demote || kind == vec_construct
5197 || kind == scalar_to_vec)
5198 || (where == vect_body && kind == vector_stmt))
5199 rs6000_vect_nonmem = true;
5205 /* Implement targetm.vectorize.finish_cost. */
5208 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5209 unsigned *body_cost, unsigned *epilogue_cost)
5211 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5213 if (cost_data->loop_info)
5214 rs6000_density_test (cost_data);
5216 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5217 that require versioning for any reason. The vectorization is at
5218 best a wash inside the loop, and the versioning checks make
5219 profitability highly unlikely and potentially quite harmful. */
5220 if (cost_data->loop_info)
5222 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5223 if (!rs6000_vect_nonmem
5224 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5225 && LOOP_REQUIRES_VERSIONING (vec_info))
5226 cost_data->cost[vect_body] += 10000;
5229 *prologue_cost = cost_data->cost[vect_prologue];
5230 *body_cost = cost_data->cost[vect_body];
5231 *epilogue_cost = cost_data->cost[vect_epilogue];
5234 /* Implement targetm.vectorize.destroy_cost_data. */
5237 rs6000_destroy_cost_data (void *data)
5242 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5243 library with vectorized intrinsics. */
5246 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5250 const char *suffix = NULL;
5251 tree fntype, new_fndecl, bdecl = NULL_TREE;
5254 machine_mode el_mode, in_mode;
5257 /* Libmass is suitable for unsafe math only as it does not correctly support
5258 parts of IEEE with the required precision such as denormals. Only support
5259 it if we have VSX to use the simd d2 or f4 functions.
5260 XXX: Add variable length support. */
5261 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5264 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5265 n = TYPE_VECTOR_SUBPARTS (type_out);
5266 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5267 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5268 if (el_mode != in_mode
5304 if (el_mode == DFmode && n == 2)
5306 bdecl = mathfn_built_in (double_type_node, fn);
5307 suffix = "d2"; /* pow -> powd2 */
5309 else if (el_mode == SFmode && n == 4)
5311 bdecl = mathfn_built_in (float_type_node, fn);
5312 suffix = "4"; /* powf -> powf4 */
5324 gcc_assert (suffix != NULL);
5325 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5329 strcpy (name, bname + sizeof ("__builtin_") - 1);
5330 strcat (name, suffix);
5333 fntype = build_function_type_list (type_out, type_in, NULL);
5334 else if (n_args == 2)
5335 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5339 /* Build a function declaration for the vectorized function. */
5340 new_fndecl = build_decl (BUILTINS_LOCATION,
5341 FUNCTION_DECL, get_identifier (name), fntype);
5342 TREE_PUBLIC (new_fndecl) = 1;
5343 DECL_EXTERNAL (new_fndecl) = 1;
5344 DECL_IS_NOVOPS (new_fndecl) = 1;
5345 TREE_READONLY (new_fndecl) = 1;
5350 /* Returns a function decl for a vectorized version of the builtin function
5351 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5352 if it is not available. */
5355 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5358 machine_mode in_mode, out_mode;
5361 if (TARGET_DEBUG_BUILTIN)
5362 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5363 combined_fn_name (combined_fn (fn)),
5364 GET_MODE_NAME (TYPE_MODE (type_out)),
5365 GET_MODE_NAME (TYPE_MODE (type_in)));
5367 if (TREE_CODE (type_out) != VECTOR_TYPE
5368 || TREE_CODE (type_in) != VECTOR_TYPE)
5371 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5372 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5373 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5374 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5379 if (VECTOR_UNIT_VSX_P (V2DFmode)
5380 && out_mode == DFmode && out_n == 2
5381 && in_mode == DFmode && in_n == 2)
5382 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5383 if (VECTOR_UNIT_VSX_P (V4SFmode)
5384 && out_mode == SFmode && out_n == 4
5385 && in_mode == SFmode && in_n == 4)
5386 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5387 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5388 && out_mode == SFmode && out_n == 4
5389 && in_mode == SFmode && in_n == 4)
5390 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5393 if (VECTOR_UNIT_VSX_P (V2DFmode)
5394 && out_mode == DFmode && out_n == 2
5395 && in_mode == DFmode && in_n == 2)
5396 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5397 if (VECTOR_UNIT_VSX_P (V4SFmode)
5398 && out_mode == SFmode && out_n == 4
5399 && in_mode == SFmode && in_n == 4)
5400 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5401 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5402 && out_mode == SFmode && out_n == 4
5403 && in_mode == SFmode && in_n == 4)
5404 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5407 if (VECTOR_UNIT_VSX_P (V2DFmode)
5408 && out_mode == DFmode && out_n == 2
5409 && in_mode == DFmode && in_n == 2)
5410 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5411 if (VECTOR_UNIT_VSX_P (V4SFmode)
5412 && out_mode == SFmode && out_n == 4
5413 && in_mode == SFmode && in_n == 4)
5414 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5415 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5416 && out_mode == SFmode && out_n == 4
5417 && in_mode == SFmode && in_n == 4)
5418 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5421 if (VECTOR_UNIT_VSX_P (V2DFmode)
5422 && out_mode == DFmode && out_n == 2
5423 && in_mode == DFmode && in_n == 2)
5424 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5425 if (VECTOR_UNIT_VSX_P (V4SFmode)
5426 && out_mode == SFmode && out_n == 4
5427 && in_mode == SFmode && in_n == 4)
5428 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5429 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5430 && out_mode == SFmode && out_n == 4
5431 && in_mode == SFmode && in_n == 4)
5432 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5435 if (VECTOR_UNIT_VSX_P (V2DFmode)
5436 && out_mode == DFmode && out_n == 2
5437 && in_mode == DFmode && in_n == 2)
5438 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5439 if (VECTOR_UNIT_VSX_P (V4SFmode)
5440 && out_mode == SFmode && out_n == 4
5441 && in_mode == SFmode && in_n == 4)
5442 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5443 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5444 && out_mode == SFmode && out_n == 4
5445 && in_mode == SFmode && in_n == 4)
5446 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5449 if (VECTOR_UNIT_VSX_P (V2DFmode)
5450 && flag_unsafe_math_optimizations
5451 && out_mode == DFmode && out_n == 2
5452 && in_mode == DFmode && in_n == 2)
5453 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5454 if (VECTOR_UNIT_VSX_P (V4SFmode)
5455 && flag_unsafe_math_optimizations
5456 && out_mode == SFmode && out_n == 4
5457 && in_mode == SFmode && in_n == 4)
5458 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5461 if (VECTOR_UNIT_VSX_P (V2DFmode)
5462 && !flag_trapping_math
5463 && out_mode == DFmode && out_n == 2
5464 && in_mode == DFmode && in_n == 2)
5465 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5466 if (VECTOR_UNIT_VSX_P (V4SFmode)
5467 && !flag_trapping_math
5468 && out_mode == SFmode && out_n == 4
5469 && in_mode == SFmode && in_n == 4)
5470 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5476 /* Generate calls to libmass if appropriate. */
5477 if (rs6000_veclib_handler)
5478 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5483 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5486 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5489 machine_mode in_mode, out_mode;
5492 if (TARGET_DEBUG_BUILTIN)
5493 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5494 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5495 GET_MODE_NAME (TYPE_MODE (type_out)),
5496 GET_MODE_NAME (TYPE_MODE (type_in)));
5498 if (TREE_CODE (type_out) != VECTOR_TYPE
5499 || TREE_CODE (type_in) != VECTOR_TYPE)
5502 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5503 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5504 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5505 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5507 enum rs6000_builtins fn
5508 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5511 case RS6000_BUILTIN_RSQRTF:
5512 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5513 && out_mode == SFmode && out_n == 4
5514 && in_mode == SFmode && in_n == 4)
5515 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5517 case RS6000_BUILTIN_RSQRT:
5518 if (VECTOR_UNIT_VSX_P (V2DFmode)
5519 && out_mode == DFmode && out_n == 2
5520 && in_mode == DFmode && in_n == 2)
5521 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5523 case RS6000_BUILTIN_RECIPF:
5524 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5525 && out_mode == SFmode && out_n == 4
5526 && in_mode == SFmode && in_n == 4)
5527 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5529 case RS6000_BUILTIN_RECIP:
5530 if (VECTOR_UNIT_VSX_P (V2DFmode)
5531 && out_mode == DFmode && out_n == 2
5532 && in_mode == DFmode && in_n == 2)
5533 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5541 /* Default CPU string for rs6000*_file_start functions. */
5542 static const char *rs6000_default_cpu;
5544 #ifdef USING_ELFOS_H
5545 const char *rs6000_machine;
5548 rs6000_machine_from_flags (void)
5550 HOST_WIDE_INT flags = rs6000_isa_flags;
5552 /* Disable the flags that should never influence the .machine selection. */
5553 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5555 if ((flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5557 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5559 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5561 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5563 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5565 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5567 if ((flags & ISA_2_1_MASKS) != 0)
5569 if ((flags & OPTION_MASK_POWERPC64) != 0)
5575 emit_asm_machine (void)
5577 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5581 /* Do anything needed at the start of the asm file. */
5584 rs6000_file_start (void)
5587 const char *start = buffer;
5588 FILE *file = asm_out_file;
5590 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5592 default_file_start ();
5594 if (flag_verbose_asm)
5596 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5598 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5600 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5604 if (global_options_set.x_rs6000_cpu_index)
5606 fprintf (file, "%s -mcpu=%s", start,
5607 processor_target_table[rs6000_cpu_index].name);
5611 if (global_options_set.x_rs6000_tune_index)
5613 fprintf (file, "%s -mtune=%s", start,
5614 processor_target_table[rs6000_tune_index].name);
5618 if (PPC405_ERRATUM77)
5620 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5624 #ifdef USING_ELFOS_H
5625 switch (rs6000_sdata)
5627 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5628 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5629 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5630 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5633 if (rs6000_sdata && g_switch_value)
5635 fprintf (file, "%s -G %d", start,
5645 #ifdef USING_ELFOS_H
5646 rs6000_machine = rs6000_machine_from_flags ();
5647 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
5648 && !global_options_set.x_rs6000_cpu_index)
5649 emit_asm_machine ();
5652 if (DEFAULT_ABI == ABI_ELFv2)
5653 fprintf (file, "\t.abiversion 2\n");
5657 /* Return nonzero if this function is known to have a null epilogue. */
5660 direct_return (void)
5662 if (reload_completed)
5664 rs6000_stack_t *info = rs6000_stack_info ();
5666 if (info->first_gp_reg_save == 32
5667 && info->first_fp_reg_save == 64
5668 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5669 && ! info->lr_save_p
5670 && ! info->cr_save_p
5671 && info->vrsave_size == 0
5679 /* Helper for num_insns_constant. Calculate number of instructions to
5680 load VALUE to a single gpr using combinations of addi, addis, ori,
5681 oris and sldi instructions. */
5684 num_insns_constant_gpr (HOST_WIDE_INT value)
5686 /* signed constant loadable with addi */
5687 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5690 /* constant loadable with addis */
5691 else if ((value & 0xffff) == 0
5692 && (value >> 31 == -1 || value >> 31 == 0))
5695 else if (TARGET_POWERPC64)
5697 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5698 HOST_WIDE_INT high = value >> 31;
5700 if (high == 0 || high == -1)
5706 return num_insns_constant_gpr (high) + 1;
5708 return num_insns_constant_gpr (low) + 1;
5710 return (num_insns_constant_gpr (high)
5711 + num_insns_constant_gpr (low) + 1);
5718 /* Helper for num_insns_constant. Allow constants formed by the
5719 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5720 and handle modes that require multiple gprs. */
5723 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5725 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5729 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5730 int insns = num_insns_constant_gpr (low);
5732 /* We won't get more than 2 from num_insns_constant_gpr
5733 except when TARGET_POWERPC64 and mode is DImode or
5734 wider, so the register mode must be DImode. */
5735 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5738 value >>= BITS_PER_WORD;
5743 /* Return the number of instructions it takes to form a constant in as
5744 many gprs are needed for MODE. */
5747 num_insns_constant (rtx op, machine_mode mode)
5751 switch (GET_CODE (op))
5757 case CONST_WIDE_INT:
5760 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5761 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5768 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5770 if (mode == SFmode || mode == SDmode)
5775 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5777 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5778 /* See the first define_split in rs6000.md handling a
5779 const_double_operand. */
5783 else if (mode == DFmode || mode == DDmode)
5788 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5790 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5792 /* See the second (32-bit) and third (64-bit) define_split
5793 in rs6000.md handling a const_double_operand. */
5794 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5795 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5798 else if (mode == TFmode || mode == TDmode
5799 || mode == KFmode || mode == IFmode)
5805 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5807 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5809 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5810 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5811 insns = num_insns_constant_multi (val, DImode);
5812 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5813 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5814 insns += num_insns_constant_multi (val, DImode);
5826 return num_insns_constant_multi (val, mode);
5829 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5830 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5831 corresponding element of the vector, but for V4SFmode, the
5832 corresponding "float" is interpreted as an SImode integer. */
5835 const_vector_elt_as_int (rtx op, unsigned int elt)
5839 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5840 gcc_assert (GET_MODE (op) != V2DImode
5841 && GET_MODE (op) != V2DFmode);
5843 tmp = CONST_VECTOR_ELT (op, elt);
5844 if (GET_MODE (op) == V4SFmode)
5845 tmp = gen_lowpart (SImode, tmp);
5846 return INTVAL (tmp);
5849 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5850 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5851 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5852 all items are set to the same value and contain COPIES replicas of the
5853 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5854 operand and the others are set to the value of the operand's msb. */
5857 vspltis_constant (rtx op, unsigned step, unsigned copies)
5859 machine_mode mode = GET_MODE (op);
5860 machine_mode inner = GET_MODE_INNER (mode);
5868 HOST_WIDE_INT splat_val;
5869 HOST_WIDE_INT msb_val;
5871 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5874 nunits = GET_MODE_NUNITS (mode);
5875 bitsize = GET_MODE_BITSIZE (inner);
5876 mask = GET_MODE_MASK (inner);
5878 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5880 msb_val = val >= 0 ? 0 : -1;
5882 /* Construct the value to be splatted, if possible. If not, return 0. */
5883 for (i = 2; i <= copies; i *= 2)
5885 HOST_WIDE_INT small_val;
5887 small_val = splat_val >> bitsize;
5889 if (splat_val != ((HOST_WIDE_INT)
5890 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5891 | (small_val & mask)))
5893 splat_val = small_val;
5896 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5897 if (EASY_VECTOR_15 (splat_val))
5900 /* Also check if we can splat, and then add the result to itself. Do so if
5901 the value is positive, of if the splat instruction is using OP's mode;
5902 for splat_val < 0, the splat and the add should use the same mode. */
5903 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5904 && (splat_val >= 0 || (step == 1 && copies == 1)))
5907 /* Also check if are loading up the most significant bit which can be done by
5908 loading up -1 and shifting the value left by -1. */
5909 else if (EASY_VECTOR_MSB (splat_val, inner))
5915 /* Check if VAL is present in every STEP-th element, and the
5916 other elements are filled with its most significant bit. */
5917 for (i = 1; i < nunits; ++i)
5919 HOST_WIDE_INT desired_val;
5920 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5921 if ((i & (step - 1)) == 0)
5924 desired_val = msb_val;
5926 if (desired_val != const_vector_elt_as_int (op, elt))
5933 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5934 instruction, filling in the bottom elements with 0 or -1.
5936 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5937 for the number of zeroes to shift in, or negative for the number of 0xff
5940 OP is a CONST_VECTOR. */
5943 vspltis_shifted (rtx op)
5945 machine_mode mode = GET_MODE (op);
5946 machine_mode inner = GET_MODE_INNER (mode);
5954 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5957 /* We need to create pseudo registers to do the shift, so don't recognize
5958 shift vector constants after reload. */
5959 if (!can_create_pseudo_p ())
5962 nunits = GET_MODE_NUNITS (mode);
5963 mask = GET_MODE_MASK (inner);
5965 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5967 /* Check if the value can really be the operand of a vspltis[bhw]. */
5968 if (EASY_VECTOR_15 (val))
5971 /* Also check if we are loading up the most significant bit which can be done
5972 by loading up -1 and shifting the value left by -1. */
5973 else if (EASY_VECTOR_MSB (val, inner))
5979 /* Check if VAL is present in every STEP-th element until we find elements
5980 that are 0 or all 1 bits. */
5981 for (i = 1; i < nunits; ++i)
5983 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5984 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5986 /* If the value isn't the splat value, check for the remaining elements
5992 for (j = i+1; j < nunits; ++j)
5994 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5995 if (const_vector_elt_as_int (op, elt2) != 0)
5999 return (nunits - i) * GET_MODE_SIZE (inner);
6002 else if ((elt_val & mask) == mask)
6004 for (j = i+1; j < nunits; ++j)
6006 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6007 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6011 return -((nunits - i) * GET_MODE_SIZE (inner));
6019 /* If all elements are equal, we don't need to do VLSDOI. */
6024 /* Return true if OP is of the given MODE and can be synthesized
6025 with a vspltisb, vspltish or vspltisw. */
6028 easy_altivec_constant (rtx op, machine_mode mode)
6030 unsigned step, copies;
6032 if (mode == VOIDmode)
6033 mode = GET_MODE (op);
6034 else if (mode != GET_MODE (op))
6037 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6039 if (mode == V2DFmode)
6040 return zero_constant (op, mode);
6042 else if (mode == V2DImode)
6044 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6045 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6048 if (zero_constant (op, mode))
6051 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6052 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6058 /* V1TImode is a special container for TImode. Ignore for now. */
6059 else if (mode == V1TImode)
6062 /* Start with a vspltisw. */
6063 step = GET_MODE_NUNITS (mode) / 4;
6066 if (vspltis_constant (op, step, copies))
6069 /* Then try with a vspltish. */
6075 if (vspltis_constant (op, step, copies))
6078 /* And finally a vspltisb. */
6084 if (vspltis_constant (op, step, copies))
6087 if (vspltis_shifted (op) != 0)
6093 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6094 result is OP. Abort if it is not possible. */
6097 gen_easy_altivec_constant (rtx op)
6099 machine_mode mode = GET_MODE (op);
6100 int nunits = GET_MODE_NUNITS (mode);
6101 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6102 unsigned step = nunits / 4;
6103 unsigned copies = 1;
6105 /* Start with a vspltisw. */
6106 if (vspltis_constant (op, step, copies))
6107 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6109 /* Then try with a vspltish. */
6115 if (vspltis_constant (op, step, copies))
6116 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6118 /* And finally a vspltisb. */
6124 if (vspltis_constant (op, step, copies))
6125 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6130 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6131 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6133 Return the number of instructions needed (1 or 2) into the address pointed
6136 Return the constant that is being split via CONSTANT_PTR. */
6139 xxspltib_constant_p (rtx op,
6144 size_t nunits = GET_MODE_NUNITS (mode);
6146 HOST_WIDE_INT value;
6149 /* Set the returned values to out of bound values. */
6150 *num_insns_ptr = -1;
6151 *constant_ptr = 256;
6153 if (!TARGET_P9_VECTOR)
6156 if (mode == VOIDmode)
6157 mode = GET_MODE (op);
6159 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6162 /* Handle (vec_duplicate <constant>). */
6163 if (GET_CODE (op) == VEC_DUPLICATE)
6165 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6166 && mode != V2DImode)
6169 element = XEXP (op, 0);
6170 if (!CONST_INT_P (element))
6173 value = INTVAL (element);
6174 if (!IN_RANGE (value, -128, 127))
6178 /* Handle (const_vector [...]). */
6179 else if (GET_CODE (op) == CONST_VECTOR)
6181 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6182 && mode != V2DImode)
6185 element = CONST_VECTOR_ELT (op, 0);
6186 if (!CONST_INT_P (element))
6189 value = INTVAL (element);
6190 if (!IN_RANGE (value, -128, 127))
6193 for (i = 1; i < nunits; i++)
6195 element = CONST_VECTOR_ELT (op, i);
6196 if (!CONST_INT_P (element))
6199 if (value != INTVAL (element))
6204 /* Handle integer constants being loaded into the upper part of the VSX
6205 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6206 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6207 else if (CONST_INT_P (op))
6209 if (!SCALAR_INT_MODE_P (mode))
6212 value = INTVAL (op);
6213 if (!IN_RANGE (value, -128, 127))
6216 if (!IN_RANGE (value, -1, 0))
6218 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6221 if (EASY_VECTOR_15 (value))
6229 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6230 sign extend. Special case 0/-1 to allow getting any VSX register instead
6231 of an Altivec register. */
6232 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6233 && EASY_VECTOR_15 (value))
6236 /* Return # of instructions and the constant byte for XXSPLTIB. */
6237 if (mode == V16QImode)
6240 else if (IN_RANGE (value, -1, 0))
6246 *constant_ptr = (int) value;
6251 output_vec_const_move (rtx *operands)
6259 mode = GET_MODE (dest);
6263 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6264 int xxspltib_value = 256;
6267 if (zero_constant (vec, mode))
6269 if (TARGET_P9_VECTOR)
6270 return "xxspltib %x0,0";
6272 else if (dest_vmx_p)
6273 return "vspltisw %0,0";
6276 return "xxlxor %x0,%x0,%x0";
6279 if (all_ones_constant (vec, mode))
6281 if (TARGET_P9_VECTOR)
6282 return "xxspltib %x0,255";
6284 else if (dest_vmx_p)
6285 return "vspltisw %0,-1";
6287 else if (TARGET_P8_VECTOR)
6288 return "xxlorc %x0,%x0,%x0";
6294 if (TARGET_P9_VECTOR
6295 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6299 operands[2] = GEN_INT (xxspltib_value & 0xff);
6300 return "xxspltib %x0,%2";
6311 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6312 if (zero_constant (vec, mode))
6313 return "vspltisw %0,0";
6315 if (all_ones_constant (vec, mode))
6316 return "vspltisw %0,-1";
6318 /* Do we need to construct a value using VSLDOI? */
6319 shift = vspltis_shifted (vec);
6323 splat_vec = gen_easy_altivec_constant (vec);
6324 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6325 operands[1] = XEXP (splat_vec, 0);
6326 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6329 switch (GET_MODE (splat_vec))
6332 return "vspltisw %0,%1";
6335 return "vspltish %0,%1";
6338 return "vspltisb %0,%1";
6348 /* Initialize vector TARGET to VALS. */
6351 rs6000_expand_vector_init (rtx target, rtx vals)
6353 machine_mode mode = GET_MODE (target);
6354 machine_mode inner_mode = GET_MODE_INNER (mode);
6355 int n_elts = GET_MODE_NUNITS (mode);
6356 int n_var = 0, one_var = -1;
6357 bool all_same = true, all_const_zero = true;
6361 for (i = 0; i < n_elts; ++i)
6363 x = XVECEXP (vals, 0, i);
6364 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6365 ++n_var, one_var = i;
6366 else if (x != CONST0_RTX (inner_mode))
6367 all_const_zero = false;
6369 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6375 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6376 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6377 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6379 /* Zero register. */
6380 emit_move_insn (target, CONST0_RTX (mode));
6383 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6385 /* Splat immediate. */
6386 emit_insn (gen_rtx_SET (target, const_vec));
6391 /* Load from constant pool. */
6392 emit_move_insn (target, const_vec);
6397 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6398 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6402 size_t num_elements = all_same ? 1 : 2;
6403 for (i = 0; i < num_elements; i++)
6405 op[i] = XVECEXP (vals, 0, i);
6406 /* Just in case there is a SUBREG with a smaller mode, do a
6408 if (GET_MODE (op[i]) != inner_mode)
6410 rtx tmp = gen_reg_rtx (inner_mode);
6411 convert_move (tmp, op[i], 0);
6414 /* Allow load with splat double word. */
6415 else if (MEM_P (op[i]))
6418 op[i] = force_reg (inner_mode, op[i]);
6420 else if (!REG_P (op[i]))
6421 op[i] = force_reg (inner_mode, op[i]);
6426 if (mode == V2DFmode)
6427 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6429 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6433 if (mode == V2DFmode)
6434 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6436 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6441 /* Special case initializing vector int if we are on 64-bit systems with
6442 direct move or we have the ISA 3.0 instructions. */
6443 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6444 && TARGET_DIRECT_MOVE_64BIT)
6448 rtx element0 = XVECEXP (vals, 0, 0);
6449 if (MEM_P (element0))
6450 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6452 element0 = force_reg (SImode, element0);
6454 if (TARGET_P9_VECTOR)
6455 emit_insn (gen_vsx_splat_v4si (target, element0));
6458 rtx tmp = gen_reg_rtx (DImode);
6459 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6460 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6469 for (i = 0; i < 4; i++)
6470 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6472 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6473 elements[2], elements[3]));
6478 /* With single precision floating point on VSX, know that internally single
6479 precision is actually represented as a double, and either make 2 V2DF
6480 vectors, and convert these vectors to single precision, or do one
6481 conversion, and splat the result to the other elements. */
6482 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6486 rtx element0 = XVECEXP (vals, 0, 0);
6488 if (TARGET_P9_VECTOR)
6490 if (MEM_P (element0))
6491 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6493 emit_insn (gen_vsx_splat_v4sf (target, element0));
6498 rtx freg = gen_reg_rtx (V4SFmode);
6499 rtx sreg = force_reg (SFmode, element0);
6500 rtx cvt = (TARGET_XSCVDPSPN
6501 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6502 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6505 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6511 rtx dbl_even = gen_reg_rtx (V2DFmode);
6512 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6513 rtx flt_even = gen_reg_rtx (V4SFmode);
6514 rtx flt_odd = gen_reg_rtx (V4SFmode);
6515 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6516 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6517 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6518 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6520 /* Use VMRGEW if we can instead of doing a permute. */
6521 if (TARGET_P8_VECTOR)
6523 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6524 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6525 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6526 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6527 if (BYTES_BIG_ENDIAN)
6528 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6530 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6534 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6535 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6536 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6537 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6538 rs6000_expand_extract_even (target, flt_even, flt_odd);
6544 /* Special case initializing vector short/char that are splats if we are on
6545 64-bit systems with direct move. */
6546 if (all_same && TARGET_DIRECT_MOVE_64BIT
6547 && (mode == V16QImode || mode == V8HImode))
6549 rtx op0 = XVECEXP (vals, 0, 0);
6550 rtx di_tmp = gen_reg_rtx (DImode);
6553 op0 = force_reg (GET_MODE_INNER (mode), op0);
6555 if (mode == V16QImode)
6557 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6558 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6562 if (mode == V8HImode)
6564 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6565 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6570 /* Store value to stack temp. Load vector element. Splat. However, splat
6571 of 64-bit items is not supported on Altivec. */
6572 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6574 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6575 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6576 XVECEXP (vals, 0, 0));
6577 x = gen_rtx_UNSPEC (VOIDmode,
6578 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6579 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6581 gen_rtx_SET (target, mem),
6583 x = gen_rtx_VEC_SELECT (inner_mode, target,
6584 gen_rtx_PARALLEL (VOIDmode,
6585 gen_rtvec (1, const0_rtx)));
6586 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6590 /* One field is non-constant. Load constant then overwrite
6594 rtx copy = copy_rtx (vals);
6596 /* Load constant part of vector, substitute neighboring value for
6598 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6599 rs6000_expand_vector_init (target, copy);
6601 /* Insert variable. */
6602 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6606 /* Construct the vector in memory one field at a time
6607 and load the whole vector. */
6608 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6609 for (i = 0; i < n_elts; i++)
6610 emit_move_insn (adjust_address_nv (mem, inner_mode,
6611 i * GET_MODE_SIZE (inner_mode)),
6612 XVECEXP (vals, 0, i));
6613 emit_move_insn (target, mem);
6616 /* Set field ELT of TARGET to VAL. */
6619 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6621 machine_mode mode = GET_MODE (target);
6622 machine_mode inner_mode = GET_MODE_INNER (mode);
6623 rtx reg = gen_reg_rtx (mode);
6625 int width = GET_MODE_SIZE (inner_mode);
6628 val = force_reg (GET_MODE (val), val);
6630 if (VECTOR_MEM_VSX_P (mode))
6632 rtx insn = NULL_RTX;
6633 rtx elt_rtx = GEN_INT (elt);
6635 if (mode == V2DFmode)
6636 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6638 else if (mode == V2DImode)
6639 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6641 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6643 if (mode == V4SImode)
6644 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6645 else if (mode == V8HImode)
6646 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6647 else if (mode == V16QImode)
6648 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6649 else if (mode == V4SFmode)
6650 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6660 /* Simplify setting single element vectors like V1TImode. */
6661 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6663 emit_move_insn (target, gen_lowpart (mode, val));
6667 /* Load single variable value. */
6668 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6669 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6670 x = gen_rtx_UNSPEC (VOIDmode,
6671 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6672 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6674 gen_rtx_SET (reg, mem),
6677 /* Linear sequence. */
6678 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6679 for (i = 0; i < 16; ++i)
6680 XVECEXP (mask, 0, i) = GEN_INT (i);
6682 /* Set permute mask to insert element into target. */
6683 for (i = 0; i < width; ++i)
6684 XVECEXP (mask, 0, elt*width + i)
6685 = GEN_INT (i + 0x10);
6686 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6688 if (BYTES_BIG_ENDIAN)
6689 x = gen_rtx_UNSPEC (mode,
6690 gen_rtvec (3, target, reg,
6691 force_reg (V16QImode, x)),
6695 if (TARGET_P9_VECTOR)
6696 x = gen_rtx_UNSPEC (mode,
6697 gen_rtvec (3, reg, target,
6698 force_reg (V16QImode, x)),
6702 /* Invert selector. We prefer to generate VNAND on P8 so
6703 that future fusion opportunities can kick in, but must
6704 generate VNOR elsewhere. */
6705 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6706 rtx iorx = (TARGET_P8_VECTOR
6707 ? gen_rtx_IOR (V16QImode, notx, notx)
6708 : gen_rtx_AND (V16QImode, notx, notx));
6709 rtx tmp = gen_reg_rtx (V16QImode);
6710 emit_insn (gen_rtx_SET (tmp, iorx));
6712 /* Permute with operands reversed and adjusted selector. */
6713 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6718 emit_insn (gen_rtx_SET (target, x));
6721 /* Extract field ELT from VEC into TARGET. */
6724 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6726 machine_mode mode = GET_MODE (vec);
6727 machine_mode inner_mode = GET_MODE_INNER (mode);
6730 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6737 emit_move_insn (target, gen_lowpart (TImode, vec));
6740 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6743 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6746 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6749 if (TARGET_DIRECT_MOVE_64BIT)
6751 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6757 if (TARGET_DIRECT_MOVE_64BIT)
6759 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6765 if (TARGET_DIRECT_MOVE_64BIT)
6767 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6773 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6774 && TARGET_DIRECT_MOVE_64BIT)
6776 if (GET_MODE (elt) != DImode)
6778 rtx tmp = gen_reg_rtx (DImode);
6779 convert_move (tmp, elt, 0);
6782 else if (!REG_P (elt))
6783 elt = force_reg (DImode, elt);
6788 emit_move_insn (target, gen_lowpart (TImode, vec));
6792 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6796 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6800 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6804 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6808 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6812 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6820 /* Allocate mode-sized buffer. */
6821 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6823 emit_move_insn (mem, vec);
6824 if (CONST_INT_P (elt))
6826 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6828 /* Add offset to field within buffer matching vector element. */
6829 mem = adjust_address_nv (mem, inner_mode,
6830 modulo_elt * GET_MODE_SIZE (inner_mode));
6831 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6835 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6836 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6837 rtx new_addr = gen_reg_rtx (Pmode);
6839 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6841 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6842 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6843 new_addr = change_address (mem, inner_mode, new_addr);
6844 emit_move_insn (target, new_addr);
6848 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6849 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6850 temporary (BASE_TMP) to fixup the address. Return the new memory address
6851 that is valid for reads or writes to a given register (SCALAR_REG). */
6854 rs6000_adjust_vec_address (rtx scalar_reg,
6858 machine_mode scalar_mode)
6860 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6861 rtx addr = XEXP (mem, 0);
6866 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6867 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
6869 /* Calculate what we need to add to the address to get the element
6871 if (CONST_INT_P (element))
6872 element_offset = GEN_INT (INTVAL (element) * scalar_size);
6875 int byte_shift = exact_log2 (scalar_size);
6876 gcc_assert (byte_shift >= 0);
6878 if (byte_shift == 0)
6879 element_offset = element;
6883 if (TARGET_POWERPC64)
6884 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
6886 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
6888 element_offset = base_tmp;
6892 /* Create the new address pointing to the element within the vector. If we
6893 are adding 0, we don't have to change the address. */
6894 if (element_offset == const0_rtx)
6897 /* A simple indirect address can be converted into a reg + offset
6899 else if (REG_P (addr) || SUBREG_P (addr))
6900 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
6902 /* Optimize D-FORM addresses with constant offset with a constant element, to
6903 include the element offset in the address directly. */
6904 else if (GET_CODE (addr) == PLUS)
6906 rtx op0 = XEXP (addr, 0);
6907 rtx op1 = XEXP (addr, 1);
6910 gcc_assert (REG_P (op0) || SUBREG_P (op0));
6911 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
6913 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
6914 rtx offset_rtx = GEN_INT (offset);
6916 if (IN_RANGE (offset, -32768, 32767)
6917 && (scalar_size < 8 || (offset & 0x3) == 0))
6918 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6921 emit_move_insn (base_tmp, offset_rtx);
6922 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6927 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
6928 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
6930 /* Note, ADDI requires the register being added to be a base
6931 register. If the register was R0, load it up into the temporary
6934 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
6936 insn = gen_add3_insn (base_tmp, op1, element_offset);
6937 gcc_assert (insn != NULL_RTX);
6942 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
6944 insn = gen_add3_insn (base_tmp, element_offset, op1);
6945 gcc_assert (insn != NULL_RTX);
6951 emit_move_insn (base_tmp, op1);
6952 emit_insn (gen_add2_insn (base_tmp, element_offset));
6955 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6961 emit_move_insn (base_tmp, addr);
6962 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6965 /* If we have a PLUS, we need to see whether the particular register class
6966 allows for D-FORM or X-FORM addressing. */
6967 if (GET_CODE (new_addr) == PLUS)
6969 rtx op1 = XEXP (new_addr, 1);
6970 addr_mask_type addr_mask;
6971 unsigned int scalar_regno = reg_or_subregno (scalar_reg);
6973 gcc_assert (HARD_REGISTER_NUM_P (scalar_regno));
6974 if (INT_REGNO_P (scalar_regno))
6975 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
6977 else if (FP_REGNO_P (scalar_regno))
6978 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
6980 else if (ALTIVEC_REGNO_P (scalar_regno))
6981 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
6986 if (REG_P (op1) || SUBREG_P (op1))
6987 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
6989 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
6992 else if (REG_P (new_addr) || SUBREG_P (new_addr))
6993 valid_addr_p = true;
6996 valid_addr_p = false;
7000 emit_move_insn (base_tmp, new_addr);
7001 new_addr = base_tmp;
7004 return change_address (mem, scalar_mode, new_addr);
7007 /* Split a variable vec_extract operation into the component instructions. */
7010 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7013 machine_mode mode = GET_MODE (src);
7014 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7015 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7016 int byte_shift = exact_log2 (scalar_size);
7018 gcc_assert (byte_shift >= 0);
7020 /* If we are given a memory address, optimize to load just the element. We
7021 don't have to adjust the vector element number on little endian
7025 int num_elements = GET_MODE_NUNITS (mode);
7026 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7028 emit_insn (gen_anddi3 (element, element, num_ele_m1));
7029 gcc_assert (REG_P (tmp_gpr));
7030 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7031 tmp_gpr, scalar_mode));
7035 else if (REG_P (src) || SUBREG_P (src))
7037 int num_elements = GET_MODE_NUNITS (mode);
7038 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7039 int bit_shift = 7 - exact_log2 (num_elements);
7041 unsigned int dest_regno = reg_or_subregno (dest);
7042 unsigned int src_regno = reg_or_subregno (src);
7043 unsigned int element_regno = reg_or_subregno (element);
7045 gcc_assert (REG_P (tmp_gpr));
7047 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7048 a general purpose register. */
7049 if (TARGET_P9_VECTOR
7050 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7051 && INT_REGNO_P (dest_regno)
7052 && ALTIVEC_REGNO_P (src_regno)
7053 && INT_REGNO_P (element_regno))
7055 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7056 rtx element_si = gen_rtx_REG (SImode, element_regno);
7058 if (mode == V16QImode)
7059 emit_insn (BYTES_BIG_ENDIAN
7060 ? gen_vextublx (dest_si, element_si, src)
7061 : gen_vextubrx (dest_si, element_si, src));
7063 else if (mode == V8HImode)
7065 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7066 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7067 emit_insn (BYTES_BIG_ENDIAN
7068 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7069 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7075 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7076 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7077 emit_insn (BYTES_BIG_ENDIAN
7078 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7079 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7086 gcc_assert (REG_P (tmp_altivec));
7088 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7089 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7090 will shift the element into the upper position (adding 3 to convert a
7091 byte shift into a bit shift). */
7092 if (scalar_size == 8)
7094 if (!BYTES_BIG_ENDIAN)
7096 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7102 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7104 emit_insn (gen_rtx_SET (tmp_gpr,
7105 gen_rtx_AND (DImode,
7106 gen_rtx_ASHIFT (DImode,
7113 if (!BYTES_BIG_ENDIAN)
7115 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7117 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7118 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7124 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7127 /* Get the value into the lower byte of the Altivec register where VSLO
7129 if (TARGET_P9_VECTOR)
7130 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7131 else if (can_create_pseudo_p ())
7132 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7135 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7136 emit_move_insn (tmp_di, tmp_gpr);
7137 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7140 /* Do the VSLO to get the value into the final location. */
7144 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7148 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7153 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7154 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7155 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7156 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7159 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7167 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7168 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7169 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7170 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7172 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7173 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7174 GEN_INT (64 - bits_in_element)));
7188 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7189 selects whether the alignment is abi mandated, optional, or
7190 both abi and optional alignment. */
7193 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7195 if (how != align_opt)
7197 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7201 if (how != align_abi)
7203 if (TREE_CODE (type) == ARRAY_TYPE
7204 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7206 if (align < BITS_PER_WORD)
7207 align = BITS_PER_WORD;
7214 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7215 instructions simply ignore the low bits; VSX memory instructions
7216 are aligned to 4 or 8 bytes. */
7219 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7221 return (STRICT_ALIGNMENT
7222 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7223 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7224 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7225 && (int) align < VECTOR_ALIGN (mode)))));
7228 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7231 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7233 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7235 if (computed != 128)
7238 if (!warned && warn_psabi)
7241 inform (input_location,
7242 "the layout of aggregates containing vectors with"
7243 " %d-byte alignment has changed in GCC 5",
7244 computed / BITS_PER_UNIT);
7247 /* In current GCC there is no special case. */
7254 /* AIX increases natural record alignment to doubleword if the first
7255 field is an FP double while the FP fields remain word aligned. */
7258 rs6000_special_round_type_align (tree type, unsigned int computed,
7259 unsigned int specified)
7261 unsigned int align = MAX (computed, specified);
7262 tree field = TYPE_FIELDS (type);
7264 /* Skip all non field decls */
7265 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7266 field = DECL_CHAIN (field);
7268 if (field != NULL && field != type)
7270 type = TREE_TYPE (field);
7271 while (TREE_CODE (type) == ARRAY_TYPE)
7272 type = TREE_TYPE (type);
7274 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7275 align = MAX (align, 64);
7281 /* Darwin increases record alignment to the natural alignment of
7285 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7286 unsigned int specified)
7288 unsigned int align = MAX (computed, specified);
7290 if (TYPE_PACKED (type))
7293 /* Find the first field, looking down into aggregates. */
7295 tree field = TYPE_FIELDS (type);
7296 /* Skip all non field decls */
7297 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7298 field = DECL_CHAIN (field);
7301 /* A packed field does not contribute any extra alignment. */
7302 if (DECL_PACKED (field))
7304 type = TREE_TYPE (field);
7305 while (TREE_CODE (type) == ARRAY_TYPE)
7306 type = TREE_TYPE (type);
7307 } while (AGGREGATE_TYPE_P (type));
7309 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7310 align = MAX (align, TYPE_ALIGN (type));
7315 /* Return 1 for an operand in small memory on V.4/eabi. */
7318 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7319 machine_mode mode ATTRIBUTE_UNUSED)
7324 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7327 if (DEFAULT_ABI != ABI_V4)
7330 if (SYMBOL_REF_P (op))
7333 else if (GET_CODE (op) != CONST
7334 || GET_CODE (XEXP (op, 0)) != PLUS
7335 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7336 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7341 rtx sum = XEXP (op, 0);
7342 HOST_WIDE_INT summand;
7344 /* We have to be careful here, because it is the referenced address
7345 that must be 32k from _SDA_BASE_, not just the symbol. */
7346 summand = INTVAL (XEXP (sum, 1));
7347 if (summand < 0 || summand > g_switch_value)
7350 sym_ref = XEXP (sum, 0);
7353 return SYMBOL_REF_SMALL_P (sym_ref);
7359 /* Return true if either operand is a general purpose register. */
7362 gpr_or_gpr_p (rtx op0, rtx op1)
7364 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7365 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7368 /* Return true if this is a move direct operation between GPR registers and
7369 floating point/VSX registers. */
7372 direct_move_p (rtx op0, rtx op1)
7374 if (!REG_P (op0) || !REG_P (op1))
7377 if (!TARGET_DIRECT_MOVE)
7380 int regno0 = REGNO (op0);
7381 int regno1 = REGNO (op1);
7382 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7385 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7388 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7394 /* Return true if the ADDR is an acceptable address for a quad memory
7395 operation of mode MODE (either LQ/STQ for general purpose registers, or
7396 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7397 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7398 3.0 LXV/STXV instruction. */
7401 quad_address_p (rtx addr, machine_mode mode, bool strict)
7405 if (GET_MODE_SIZE (mode) != 16)
7408 if (legitimate_indirect_address_p (addr, strict))
7411 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7414 if (GET_CODE (addr) != PLUS)
7417 op0 = XEXP (addr, 0);
7418 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7421 op1 = XEXP (addr, 1);
7422 if (!CONST_INT_P (op1))
7425 return quad_address_offset_p (INTVAL (op1));
7428 /* Return true if this is a load or store quad operation. This function does
7429 not handle the atomic quad memory instructions. */
7432 quad_load_store_p (rtx op0, rtx op1)
7436 if (!TARGET_QUAD_MEMORY)
7439 else if (REG_P (op0) && MEM_P (op1))
7440 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7441 && quad_memory_operand (op1, GET_MODE (op1))
7442 && !reg_overlap_mentioned_p (op0, op1));
7444 else if (MEM_P (op0) && REG_P (op1))
7445 ret = (quad_memory_operand (op0, GET_MODE (op0))
7446 && quad_int_reg_operand (op1, GET_MODE (op1)));
7451 if (TARGET_DEBUG_ADDR)
7453 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7454 ret ? "true" : "false");
7455 debug_rtx (gen_rtx_SET (op0, op1));
7461 /* Given an address, return a constant offset term if one exists. */
7464 address_offset (rtx op)
7466 if (GET_CODE (op) == PRE_INC
7467 || GET_CODE (op) == PRE_DEC)
7469 else if (GET_CODE (op) == PRE_MODIFY
7470 || GET_CODE (op) == LO_SUM)
7473 if (GET_CODE (op) == CONST)
7476 if (GET_CODE (op) == PLUS)
7479 if (CONST_INT_P (op))
7485 /* Return true if the MEM operand is a memory operand suitable for use
7486 with a (full width, possibly multiple) gpr load/store. On
7487 powerpc64 this means the offset must be divisible by 4.
7488 Implements 'Y' constraint.
7490 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7491 a constraint function we know the operand has satisfied a suitable
7494 Offsetting a lo_sum should not be allowed, except where we know by
7495 alignment that a 32k boundary is not crossed. Note that by
7496 "offsetting" here we mean a further offset to access parts of the
7497 MEM. It's fine to have a lo_sum where the inner address is offset
7498 from a sym, since the same sym+offset will appear in the high part
7499 of the address calculation. */
7502 mem_operand_gpr (rtx op, machine_mode mode)
7504 unsigned HOST_WIDE_INT offset;
7506 rtx addr = XEXP (op, 0);
7508 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7510 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7511 && mode_supports_pre_incdec_p (mode)
7512 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7515 /* Don't allow non-offsettable addresses. See PRs 83969 and 84279. */
7516 if (!rs6000_offsettable_memref_p (op, mode, false))
7519 op = address_offset (addr);
7523 offset = INTVAL (op);
7524 if (TARGET_POWERPC64 && (offset & 3) != 0)
7527 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7531 if (GET_CODE (addr) == LO_SUM)
7532 /* For lo_sum addresses, we must allow any offset except one that
7533 causes a wrap, so test only the low 16 bits. */
7534 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7536 return offset + 0x8000 < 0x10000u - extra;
7539 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7540 enforce an offset divisible by 4 even for 32-bit. */
7543 mem_operand_ds_form (rtx op, machine_mode mode)
7545 unsigned HOST_WIDE_INT offset;
7547 rtx addr = XEXP (op, 0);
7549 if (!offsettable_address_p (false, mode, addr))
7552 op = address_offset (addr);
7556 offset = INTVAL (op);
7557 if ((offset & 3) != 0)
7560 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7564 if (GET_CODE (addr) == LO_SUM)
7565 /* For lo_sum addresses, we must allow any offset except one that
7566 causes a wrap, so test only the low 16 bits. */
7567 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7569 return offset + 0x8000 < 0x10000u - extra;
7572 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7575 reg_offset_addressing_ok_p (machine_mode mode)
7589 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7590 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7591 a vector mode, if we want to use the VSX registers to move it around,
7592 we need to restrict ourselves to reg+reg addressing. Similarly for
7593 IEEE 128-bit floating point that is passed in a single vector
7595 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7596 return mode_supports_dq_form (mode);
7600 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7601 addressing for the LFIWZX and STFIWX instructions. */
7602 if (TARGET_NO_SDMODE_STACK)
7614 virtual_stack_registers_memory_p (rtx op)
7619 regnum = REGNO (op);
7621 else if (GET_CODE (op) == PLUS
7622 && REG_P (XEXP (op, 0))
7623 && CONST_INT_P (XEXP (op, 1)))
7624 regnum = REGNO (XEXP (op, 0));
7629 return (regnum >= FIRST_VIRTUAL_REGISTER
7630 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7633 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7634 is known to not straddle a 32k boundary. This function is used
7635 to determine whether -mcmodel=medium code can use TOC pointer
7636 relative addressing for OP. This means the alignment of the TOC
7637 pointer must also be taken into account, and unfortunately that is
7640 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7641 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7645 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7649 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7651 if (!SYMBOL_REF_P (op))
7654 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7656 if (mode_supports_dq_form (mode))
7659 dsize = GET_MODE_SIZE (mode);
7660 decl = SYMBOL_REF_DECL (op);
7666 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7667 replacing memory addresses with an anchor plus offset. We
7668 could find the decl by rummaging around in the block->objects
7669 VEC for the given offset but that seems like too much work. */
7670 dalign = BITS_PER_UNIT;
7671 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7672 && SYMBOL_REF_ANCHOR_P (op)
7673 && SYMBOL_REF_BLOCK (op) != NULL)
7675 struct object_block *block = SYMBOL_REF_BLOCK (op);
7677 dalign = block->alignment;
7678 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7680 else if (CONSTANT_POOL_ADDRESS_P (op))
7682 /* It would be nice to have get_pool_align().. */
7683 machine_mode cmode = get_pool_mode (op);
7685 dalign = GET_MODE_ALIGNMENT (cmode);
7688 else if (DECL_P (decl))
7690 dalign = DECL_ALIGN (decl);
7694 /* Allow BLKmode when the entire object is known to not
7695 cross a 32k boundary. */
7696 if (!DECL_SIZE_UNIT (decl))
7699 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7702 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7706 dalign /= BITS_PER_UNIT;
7707 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7708 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7709 return dalign >= dsize;
7715 /* Find how many bits of the alignment we know for this access. */
7716 dalign /= BITS_PER_UNIT;
7717 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7718 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7720 lsb = offset & -offset;
7724 return dalign >= dsize;
7728 constant_pool_expr_p (rtx op)
7732 split_const (op, &base, &offset);
7733 return (SYMBOL_REF_P (base)
7734 && CONSTANT_POOL_ADDRESS_P (base)
7735 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7738 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
7739 use that as the register to put the HIGH value into if register allocation
7743 create_TOC_reference (rtx symbol, rtx largetoc_reg)
7745 rtx tocrel, tocreg, hi;
7747 if (TARGET_DEBUG_ADDR)
7749 if (SYMBOL_REF_P (symbol))
7750 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
7754 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
7755 GET_RTX_NAME (GET_CODE (symbol)));
7760 if (!can_create_pseudo_p ())
7761 df_set_regs_ever_live (TOC_REGISTER, true);
7763 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
7764 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
7765 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
7768 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
7769 if (largetoc_reg != NULL)
7771 emit_move_insn (largetoc_reg, hi);
7774 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
7777 /* These are only used to pass through from print_operand/print_operand_address
7778 to rs6000_output_addr_const_extra over the intervening function
7779 output_addr_const which is not target code. */
7780 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7782 /* Return true if OP is a toc pointer relative address (the output
7783 of create_TOC_reference). If STRICT, do not match non-split
7784 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7785 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7786 TOCREL_OFFSET_RET respectively. */
7789 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7790 const_rtx *tocrel_offset_ret)
7795 if (TARGET_CMODEL != CMODEL_SMALL)
7797 /* When strict ensure we have everything tidy. */
7799 && !(GET_CODE (op) == LO_SUM
7800 && REG_P (XEXP (op, 0))
7801 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7804 /* When not strict, allow non-split TOC addresses and also allow
7805 (lo_sum (high ..)) TOC addresses created during reload. */
7806 if (GET_CODE (op) == LO_SUM)
7810 const_rtx tocrel_base = op;
7811 const_rtx tocrel_offset = const0_rtx;
7813 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7815 tocrel_base = XEXP (op, 0);
7816 tocrel_offset = XEXP (op, 1);
7819 if (tocrel_base_ret)
7820 *tocrel_base_ret = tocrel_base;
7821 if (tocrel_offset_ret)
7822 *tocrel_offset_ret = tocrel_offset;
7824 return (GET_CODE (tocrel_base) == UNSPEC
7825 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7826 && REG_P (XVECEXP (tocrel_base, 0, 1))
7827 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7830 /* Return true if X is a constant pool address, and also for cmodel=medium
7831 if X is a toc-relative address known to be offsettable within MODE. */
7834 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7837 const_rtx tocrel_base, tocrel_offset;
7838 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7839 && (TARGET_CMODEL != CMODEL_MEDIUM
7840 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7842 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7843 INTVAL (tocrel_offset), mode)));
7847 legitimate_small_data_p (machine_mode mode, rtx x)
7849 return (DEFAULT_ABI == ABI_V4
7850 && !flag_pic && !TARGET_TOC
7851 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7852 && small_data_operand (x, mode));
7856 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7857 bool strict, bool worst_case)
7859 unsigned HOST_WIDE_INT offset;
7862 if (GET_CODE (x) != PLUS)
7864 if (!REG_P (XEXP (x, 0)))
7866 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7868 if (mode_supports_dq_form (mode))
7869 return quad_address_p (x, mode, strict);
7870 if (!reg_offset_addressing_ok_p (mode))
7871 return virtual_stack_registers_memory_p (x);
7872 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7874 if (!CONST_INT_P (XEXP (x, 1)))
7877 offset = INTVAL (XEXP (x, 1));
7884 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7886 if (VECTOR_MEM_VSX_P (mode))
7891 if (!TARGET_POWERPC64)
7893 else if (offset & 3)
7906 if (!TARGET_POWERPC64)
7908 else if (offset & 3)
7917 return offset < 0x10000 - extra;
7921 legitimate_indexed_address_p (rtx x, int strict)
7925 if (GET_CODE (x) != PLUS)
7931 return (REG_P (op0) && REG_P (op1)
7932 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7933 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7934 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7935 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7939 avoiding_indexed_address_p (machine_mode mode)
7941 /* Avoid indexed addressing for modes that have non-indexed
7942 load/store instruction forms. */
7943 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7947 legitimate_indirect_address_p (rtx x, int strict)
7949 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
7953 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
7955 if (!TARGET_MACHO || !flag_pic
7956 || mode != SImode || !MEM_P (x))
7960 if (GET_CODE (x) != LO_SUM)
7962 if (!REG_P (XEXP (x, 0)))
7964 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
7968 return CONSTANT_P (x);
7972 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
7974 if (GET_CODE (x) != LO_SUM)
7976 if (!REG_P (XEXP (x, 0)))
7978 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7980 /* quad word addresses are restricted, and we can't use LO_SUM. */
7981 if (mode_supports_dq_form (mode))
7985 if (TARGET_ELF || TARGET_MACHO)
7989 if (DEFAULT_ABI == ABI_V4 && flag_pic)
7991 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
7992 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
7993 recognizes some LO_SUM addresses as valid although this
7994 function says opposite. In most cases, LRA through different
7995 transformations can generate correct code for address reloads.
7996 It cannot manage only some LO_SUM cases. So we need to add
7997 code here saying that some addresses are still valid. */
7998 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
7999 && small_toc_ref (x, VOIDmode));
8000 if (TARGET_TOC && ! large_toc_ok)
8002 if (GET_MODE_NUNITS (mode) != 1)
8004 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8005 && !(/* ??? Assume floating point reg based on mode? */
8006 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8009 return CONSTANT_P (x) || large_toc_ok;
8016 /* Try machine-dependent ways of modifying an illegitimate address
8017 to be legitimate. If we find one, return the new, valid address.
8018 This is used from only one place: `memory_address' in explow.c.
8020 OLDX is the address as it was before break_out_memory_refs was
8021 called. In some cases it is useful to look at this to decide what
8024 It is always safe for this function to do nothing. It exists to
8025 recognize opportunities to optimize the output.
8027 On RS/6000, first check for the sum of a register with a constant
8028 integer that is out of range. If so, generate code to add the
8029 constant with the low-order 16 bits masked to the register and force
8030 this result into another register (this can be done with `cau').
8031 Then generate an address of REG+(CONST&0xffff), allowing for the
8032 possibility of bit 16 being a one.
8034 Then check for the sum of a register and something not constant, try to
8035 load the other things into a register and return the sum. */
8038 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8043 if (!reg_offset_addressing_ok_p (mode)
8044 || mode_supports_dq_form (mode))
8046 if (virtual_stack_registers_memory_p (x))
8049 /* In theory we should not be seeing addresses of the form reg+0,
8050 but just in case it is generated, optimize it away. */
8051 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8052 return force_reg (Pmode, XEXP (x, 0));
8054 /* For TImode with load/store quad, restrict addresses to just a single
8055 pointer, so it works with both GPRs and VSX registers. */
8056 /* Make sure both operands are registers. */
8057 else if (GET_CODE (x) == PLUS
8058 && (mode != TImode || !TARGET_VSX))
8059 return gen_rtx_PLUS (Pmode,
8060 force_reg (Pmode, XEXP (x, 0)),
8061 force_reg (Pmode, XEXP (x, 1)));
8063 return force_reg (Pmode, x);
8065 if (SYMBOL_REF_P (x))
8067 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8069 return rs6000_legitimize_tls_address (x, model);
8081 /* As in legitimate_offset_address_p we do not assume
8082 worst-case. The mode here is just a hint as to the registers
8083 used. A TImode is usually in gprs, but may actually be in
8084 fprs. Leave worst-case scenario for reload to handle via
8085 insn constraints. PTImode is only GPRs. */
8092 if (GET_CODE (x) == PLUS
8093 && REG_P (XEXP (x, 0))
8094 && CONST_INT_P (XEXP (x, 1))
8095 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8096 >= 0x10000 - extra))
8098 HOST_WIDE_INT high_int, low_int;
8100 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8101 if (low_int >= 0x8000 - extra)
8103 high_int = INTVAL (XEXP (x, 1)) - low_int;
8104 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8105 GEN_INT (high_int)), 0);
8106 return plus_constant (Pmode, sum, low_int);
8108 else if (GET_CODE (x) == PLUS
8109 && REG_P (XEXP (x, 0))
8110 && !CONST_INT_P (XEXP (x, 1))
8111 && GET_MODE_NUNITS (mode) == 1
8112 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8113 || (/* ??? Assume floating point reg based on mode? */
8114 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8115 && !avoiding_indexed_address_p (mode))
8117 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8118 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8120 else if ((TARGET_ELF
8122 || !MACHO_DYNAMIC_NO_PIC_P
8129 && !CONST_WIDE_INT_P (x)
8130 && !CONST_DOUBLE_P (x)
8132 && GET_MODE_NUNITS (mode) == 1
8133 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8134 || (/* ??? Assume floating point reg based on mode? */
8135 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8137 rtx reg = gen_reg_rtx (Pmode);
8139 emit_insn (gen_elf_high (reg, x));
8141 emit_insn (gen_macho_high (reg, x));
8142 return gen_rtx_LO_SUM (Pmode, reg, x);
8146 && constant_pool_expr_p (x)
8147 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8148 return create_TOC_reference (x, NULL_RTX);
8153 /* Debug version of rs6000_legitimize_address. */
8155 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8161 ret = rs6000_legitimize_address (x, oldx, mode);
8162 insns = get_insns ();
8168 "\nrs6000_legitimize_address: mode %s, old code %s, "
8169 "new code %s, modified\n",
8170 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8171 GET_RTX_NAME (GET_CODE (ret)));
8173 fprintf (stderr, "Original address:\n");
8176 fprintf (stderr, "oldx:\n");
8179 fprintf (stderr, "New address:\n");
8184 fprintf (stderr, "Insns added:\n");
8185 debug_rtx_list (insns, 20);
8191 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8192 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8203 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8204 We need to emit DTP-relative relocations. */
8206 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8208 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8213 fputs ("\t.long\t", file);
8216 fputs (DOUBLE_INT_ASM_OP, file);
8221 output_addr_const (file, x);
8223 fputs ("@dtprel+0x8000", file);
8224 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8226 switch (SYMBOL_REF_TLS_MODEL (x))
8230 case TLS_MODEL_LOCAL_EXEC:
8231 fputs ("@le", file);
8233 case TLS_MODEL_INITIAL_EXEC:
8234 fputs ("@ie", file);
8236 case TLS_MODEL_GLOBAL_DYNAMIC:
8237 case TLS_MODEL_LOCAL_DYNAMIC:
8246 /* Return true if X is a symbol that refers to real (rather than emulated)
8250 rs6000_real_tls_symbol_ref_p (rtx x)
8252 return (SYMBOL_REF_P (x)
8253 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8256 /* In the name of slightly smaller debug output, and to cater to
8257 general assembler lossage, recognize various UNSPEC sequences
8258 and turn them back into a direct symbol reference. */
8261 rs6000_delegitimize_address (rtx orig_x)
8265 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8266 orig_x = XVECEXP (orig_x, 0, 0);
8268 orig_x = delegitimize_mem_from_attrs (orig_x);
8275 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8279 if (GET_CODE (y) == PLUS
8280 && GET_MODE (y) == Pmode
8281 && CONST_INT_P (XEXP (y, 1)))
8283 offset = XEXP (y, 1);
8287 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8289 y = XVECEXP (y, 0, 0);
8292 /* Do not associate thread-local symbols with the original
8293 constant pool symbol. */
8296 && CONSTANT_POOL_ADDRESS_P (y)
8297 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8301 if (offset != NULL_RTX)
8302 y = gen_rtx_PLUS (Pmode, y, offset);
8303 if (!MEM_P (orig_x))
8306 return replace_equiv_address_nv (orig_x, y);
8310 && GET_CODE (orig_x) == LO_SUM
8311 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8313 y = XEXP (XEXP (orig_x, 1), 0);
8314 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8315 return XVECEXP (y, 0, 0);
8321 /* Return true if X shouldn't be emitted into the debug info.
8322 The linker doesn't like .toc section references from
8323 .debug_* sections, so reject .toc section symbols. */
8326 rs6000_const_not_ok_for_debug_p (rtx x)
8328 if (GET_CODE (x) == UNSPEC)
8330 if (SYMBOL_REF_P (x)
8331 && CONSTANT_POOL_ADDRESS_P (x))
8333 rtx c = get_pool_constant (x);
8334 machine_mode cmode = get_pool_mode (x);
8335 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8342 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8345 rs6000_legitimate_combined_insn (rtx_insn *insn)
8347 int icode = INSN_CODE (insn);
8349 /* Reject creating doloop insns. Combine should not be allowed
8350 to create these for a number of reasons:
8351 1) In a nested loop, if combine creates one of these in an
8352 outer loop and the register allocator happens to allocate ctr
8353 to the outer loop insn, then the inner loop can't use ctr.
8354 Inner loops ought to be more highly optimized.
8355 2) Combine often wants to create one of these from what was
8356 originally a three insn sequence, first combining the three
8357 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8358 allocated ctr, the splitter takes use back to the three insn
8359 sequence. It's better to stop combine at the two insn
8361 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8362 insns, the register allocator sometimes uses floating point
8363 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8364 jump insn and output reloads are not implemented for jumps,
8365 the ctrsi/ctrdi splitters need to handle all possible cases.
8366 That's a pain, and it gets to be seriously difficult when a
8367 splitter that runs after reload needs memory to transfer from
8368 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8369 for the difficult case. It's better to not create problems
8370 in the first place. */
8371 if (icode != CODE_FOR_nothing
8372 && (icode == CODE_FOR_bdz_si
8373 || icode == CODE_FOR_bdz_di
8374 || icode == CODE_FOR_bdnz_si
8375 || icode == CODE_FOR_bdnz_di
8376 || icode == CODE_FOR_bdztf_si
8377 || icode == CODE_FOR_bdztf_di
8378 || icode == CODE_FOR_bdnztf_si
8379 || icode == CODE_FOR_bdnztf_di))
8385 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8387 static GTY(()) rtx rs6000_tls_symbol;
8389 rs6000_tls_get_addr (void)
8391 if (!rs6000_tls_symbol)
8392 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8394 return rs6000_tls_symbol;
8397 /* Construct the SYMBOL_REF for TLS GOT references. */
8399 static GTY(()) rtx rs6000_got_symbol;
8401 rs6000_got_sym (void)
8403 if (!rs6000_got_symbol)
8405 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8406 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8407 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8410 return rs6000_got_symbol;
8413 /* AIX Thread-Local Address support. */
8416 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8418 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8422 name = XSTR (addr, 0);
8423 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8424 or the symbol will be in TLS private data section. */
8425 if (name[strlen (name) - 1] != ']'
8426 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8427 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8429 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8430 strcpy (tlsname, name);
8432 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8433 tlsaddr = copy_rtx (addr);
8434 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8439 /* Place addr into TOC constant pool. */
8440 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8442 /* Output the TOC entry and create the MEM referencing the value. */
8443 if (constant_pool_expr_p (XEXP (sym, 0))
8444 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8446 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8447 mem = gen_const_mem (Pmode, tocref);
8448 set_mem_alias_set (mem, get_TOC_alias_set ());
8453 /* Use global-dynamic for local-dynamic. */
8454 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8455 || model == TLS_MODEL_LOCAL_DYNAMIC)
8457 /* Create new TOC reference for @m symbol. */
8458 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8459 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8460 strcpy (tlsname, "*LCM");
8461 strcat (tlsname, name + 3);
8462 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8463 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8464 tocref = create_TOC_reference (modaddr, NULL_RTX);
8465 rtx modmem = gen_const_mem (Pmode, tocref);
8466 set_mem_alias_set (modmem, get_TOC_alias_set ());
8468 rtx modreg = gen_reg_rtx (Pmode);
8469 emit_insn (gen_rtx_SET (modreg, modmem));
8471 tmpreg = gen_reg_rtx (Pmode);
8472 emit_insn (gen_rtx_SET (tmpreg, mem));
8474 dest = gen_reg_rtx (Pmode);
8476 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8478 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8481 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8482 else if (TARGET_32BIT)
8484 tlsreg = gen_reg_rtx (SImode);
8485 emit_insn (gen_tls_get_tpointer (tlsreg));
8488 tlsreg = gen_rtx_REG (DImode, 13);
8490 /* Load the TOC value into temporary register. */
8491 tmpreg = gen_reg_rtx (Pmode);
8492 emit_insn (gen_rtx_SET (tmpreg, mem));
8493 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8494 gen_rtx_MINUS (Pmode, addr, tlsreg));
8496 /* Add TOC symbol value to TLS pointer. */
8497 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8502 /* Output arg setup instructions for a !TARGET_TLS_MARKERS
8503 __tls_get_addr call. */
8506 rs6000_output_tlsargs (rtx *operands)
8508 /* Set up operands for output_asm_insn, without modifying OPERANDS. */
8511 /* The set dest of the call, ie. r3, which is also the first arg reg. */
8512 op[0] = operands[0];
8513 /* The TLS symbol from global_tlsarg stashed as CALL operand 2. */
8514 op[1] = XVECEXP (operands[2], 0, 0);
8515 if (XINT (operands[2], 1) == UNSPEC_TLSGD)
8517 /* The GOT register. */
8518 op[2] = XVECEXP (operands[2], 0, 1);
8519 if (TARGET_CMODEL != CMODEL_SMALL)
8520 output_asm_insn ("addis %0,%2,%1@got@tlsgd@ha\n\t"
8521 "addi %0,%0,%1@got@tlsgd@l", op);
8523 output_asm_insn ("addi %0,%2,%1@got@tlsgd", op);
8525 else if (XINT (operands[2], 1) == UNSPEC_TLSLD)
8527 if (TARGET_CMODEL != CMODEL_SMALL)
8528 output_asm_insn ("addis %0,%1,%&@got@tlsld@ha\n\t"
8529 "addi %0,%0,%&@got@tlsld@l", op);
8531 output_asm_insn ("addi %0,%1,%&@got@tlsld", op);
8537 /* Passes the tls arg value for global dynamic and local dynamic
8538 emit_library_call_value in rs6000_legitimize_tls_address to
8539 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8540 marker relocs put on __tls_get_addr calls. */
8541 static rtx global_tlsarg;
8543 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8544 this (thread-local) address. */
8547 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8552 return rs6000_legitimize_tls_address_aix (addr, model);
8554 dest = gen_reg_rtx (Pmode);
8555 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
8561 tlsreg = gen_rtx_REG (Pmode, 13);
8562 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8566 tlsreg = gen_rtx_REG (Pmode, 2);
8567 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8571 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8575 tmp = gen_reg_rtx (Pmode);
8578 tlsreg = gen_rtx_REG (Pmode, 13);
8579 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8583 tlsreg = gen_rtx_REG (Pmode, 2);
8584 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8588 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8590 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8595 rtx got, tga, tmp1, tmp2;
8597 /* We currently use relocations like @got@tlsgd for tls, which
8598 means the linker will handle allocation of tls entries, placing
8599 them in the .got section. So use a pointer to the .got section,
8600 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8601 or to secondary GOT sections used by 32-bit -fPIC. */
8603 got = gen_rtx_REG (Pmode, 2);
8607 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8610 rtx gsym = rs6000_got_sym ();
8611 got = gen_reg_rtx (Pmode);
8613 rs6000_emit_move (got, gsym, Pmode);
8618 tmp1 = gen_reg_rtx (Pmode);
8619 tmp2 = gen_reg_rtx (Pmode);
8620 mem = gen_const_mem (Pmode, tmp1);
8621 lab = gen_label_rtx ();
8622 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8623 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8624 if (TARGET_LINK_STACK)
8625 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8626 emit_move_insn (tmp2, mem);
8627 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8628 set_unique_reg_note (last, REG_EQUAL, gsym);
8633 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8635 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8637 tga = rs6000_tls_get_addr ();
8638 global_tlsarg = arg;
8639 if (TARGET_TLS_MARKERS)
8641 rtx argreg = gen_rtx_REG (Pmode, 3);
8642 emit_insn (gen_rtx_SET (argreg, arg));
8643 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
8647 emit_library_call_value (tga, dest, LCT_CONST, Pmode);
8648 global_tlsarg = NULL_RTX;
8650 /* Make a note so that the result of this call can be CSEd. */
8651 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8652 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8653 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8655 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8657 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8658 tga = rs6000_tls_get_addr ();
8659 tmp1 = gen_reg_rtx (Pmode);
8660 global_tlsarg = arg;
8661 if (TARGET_TLS_MARKERS)
8663 rtx argreg = gen_rtx_REG (Pmode, 3);
8664 emit_insn (gen_rtx_SET (argreg, arg));
8665 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
8669 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode);
8670 global_tlsarg = NULL_RTX;
8672 /* Make a note so that the result of this call can be CSEd. */
8673 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8674 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8675 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8677 if (rs6000_tls_size == 16)
8680 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8682 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8684 else if (rs6000_tls_size == 32)
8686 tmp2 = gen_reg_rtx (Pmode);
8688 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8690 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8693 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8695 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8699 tmp2 = gen_reg_rtx (Pmode);
8701 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8703 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8705 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8711 /* IE, or 64-bit offset LE. */
8712 tmp2 = gen_reg_rtx (Pmode);
8714 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8716 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8719 insn = gen_tls_tls_64 (dest, tmp2, addr);
8721 insn = gen_tls_tls_32 (dest, tmp2, addr);
8729 /* Only create the global variable for the stack protect guard if we are using
8730 the global flavor of that guard. */
8732 rs6000_init_stack_protect_guard (void)
8734 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8735 return default_stack_protect_guard ();
8740 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8743 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8745 if (GET_CODE (x) == HIGH
8746 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8749 /* A TLS symbol in the TOC cannot contain a sum. */
8750 if (GET_CODE (x) == CONST
8751 && GET_CODE (XEXP (x, 0)) == PLUS
8752 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8753 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8756 /* Do not place an ELF TLS symbol in the constant pool. */
8757 return TARGET_ELF && tls_referenced_p (x);
8760 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8761 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8762 can be addressed relative to the toc pointer. */
8765 use_toc_relative_ref (rtx sym, machine_mode mode)
8767 return ((constant_pool_expr_p (sym)
8768 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8769 get_pool_mode (sym)))
8770 || (TARGET_CMODEL == CMODEL_MEDIUM
8771 && SYMBOL_REF_LOCAL_P (sym)
8772 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8775 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8776 that is a valid memory address for an instruction.
8777 The MODE argument is the machine mode for the MEM expression
8778 that wants to use this address.
8780 On the RS/6000, there are four valid address: a SYMBOL_REF that
8781 refers to a constant pool entry of an address (or the sum of it
8782 plus a constant), a short (16-bit signed) constant plus a register,
8783 the sum of two registers, or a register indirect, possibly with an
8784 auto-increment. For DFmode, DDmode and DImode with a constant plus
8785 register, we must ensure that both words are addressable or PowerPC64
8786 with offset word aligned.
8788 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8789 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8790 because adjacent memory cells are accessed by adding word-sized offsets
8791 during assembly output. */
8793 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8795 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8796 bool quad_offset_p = mode_supports_dq_form (mode);
8798 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8799 if (VECTOR_MEM_ALTIVEC_P (mode)
8800 && GET_CODE (x) == AND
8801 && CONST_INT_P (XEXP (x, 1))
8802 && INTVAL (XEXP (x, 1)) == -16)
8805 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8807 if (legitimate_indirect_address_p (x, reg_ok_strict))
8810 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8811 && mode_supports_pre_incdec_p (mode)
8812 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8814 /* Handle restricted vector d-form offsets in ISA 3.0. */
8817 if (quad_address_p (x, mode, reg_ok_strict))
8820 else if (virtual_stack_registers_memory_p (x))
8823 else if (reg_offset_p)
8825 if (legitimate_small_data_p (mode, x))
8827 if (legitimate_constant_pool_address_p (x, mode,
8828 reg_ok_strict || lra_in_progress))
8832 /* For TImode, if we have TImode in VSX registers, only allow register
8833 indirect addresses. This will allow the values to go in either GPRs
8834 or VSX registers without reloading. The vector types would tend to
8835 go into VSX registers, so we allow REG+REG, while TImode seems
8836 somewhat split, in that some uses are GPR based, and some VSX based. */
8837 /* FIXME: We could loosen this by changing the following to
8838 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8839 but currently we cannot allow REG+REG addressing for TImode. See
8840 PR72827 for complete details on how this ends up hoodwinking DSE. */
8841 if (mode == TImode && TARGET_VSX)
8843 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8846 && GET_CODE (x) == PLUS
8847 && REG_P (XEXP (x, 0))
8848 && (XEXP (x, 0) == virtual_stack_vars_rtx
8849 || XEXP (x, 0) == arg_pointer_rtx)
8850 && CONST_INT_P (XEXP (x, 1)))
8852 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8854 if (!FLOAT128_2REG_P (mode)
8855 && (TARGET_HARD_FLOAT
8857 || (mode != DFmode && mode != DDmode))
8858 && (TARGET_POWERPC64 || mode != DImode)
8859 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8861 && !avoiding_indexed_address_p (mode)
8862 && legitimate_indexed_address_p (x, reg_ok_strict))
8864 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8865 && mode_supports_pre_modify_p (mode)
8866 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8867 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8868 reg_ok_strict, false)
8869 || (!avoiding_indexed_address_p (mode)
8870 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8871 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8873 if (reg_offset_p && !quad_offset_p
8874 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8879 /* Debug version of rs6000_legitimate_address_p. */
8881 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8884 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8886 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8887 "strict = %d, reload = %s, code = %s\n",
8888 ret ? "true" : "false",
8889 GET_MODE_NAME (mode),
8891 (reload_completed ? "after" : "before"),
8892 GET_RTX_NAME (GET_CODE (x)));
8898 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8901 rs6000_mode_dependent_address_p (const_rtx addr,
8902 addr_space_t as ATTRIBUTE_UNUSED)
8904 return rs6000_mode_dependent_address_ptr (addr);
8907 /* Go to LABEL if ADDR (a legitimate address expression)
8908 has an effect that depends on the machine mode it is used for.
8910 On the RS/6000 this is true of all integral offsets (since AltiVec
8911 and VSX modes don't allow them) or is a pre-increment or decrement.
8913 ??? Except that due to conceptual problems in offsettable_address_p
8914 we can't really report the problems of integral offsets. So leave
8915 this assuming that the adjustable offset must be valid for the
8916 sub-words of a TFmode operand, which is what we had before. */
8919 rs6000_mode_dependent_address (const_rtx addr)
8921 switch (GET_CODE (addr))
8924 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8925 is considered a legitimate address before reload, so there
8926 are no offset restrictions in that case. Note that this
8927 condition is safe in strict mode because any address involving
8928 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8929 been rejected as illegitimate. */
8930 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8931 && XEXP (addr, 0) != arg_pointer_rtx
8932 && CONST_INT_P (XEXP (addr, 1)))
8934 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8935 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
8940 /* Anything in the constant pool is sufficiently aligned that
8941 all bytes have the same high part address. */
8942 return !legitimate_constant_pool_address_p (addr, QImode, false);
8944 /* Auto-increment cases are now treated generically in recog.c. */
8946 return TARGET_UPDATE;
8948 /* AND is only allowed in Altivec loads. */
8959 /* Debug version of rs6000_mode_dependent_address. */
8961 rs6000_debug_mode_dependent_address (const_rtx addr)
8963 bool ret = rs6000_mode_dependent_address (addr);
8965 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8966 ret ? "true" : "false");
8972 /* Implement FIND_BASE_TERM. */
8975 rs6000_find_base_term (rtx op)
8980 if (GET_CODE (base) == CONST)
8981 base = XEXP (base, 0);
8982 if (GET_CODE (base) == PLUS)
8983 base = XEXP (base, 0);
8984 if (GET_CODE (base) == UNSPEC)
8985 switch (XINT (base, 1))
8988 case UNSPEC_MACHOPIC_OFFSET:
8989 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8990 for aliasing purposes. */
8991 return XVECEXP (base, 0, 0);
8997 /* More elaborate version of recog's offsettable_memref_p predicate
8998 that works around the ??? note of rs6000_mode_dependent_address.
8999 In particular it accepts
9001 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9003 in 32-bit mode, that the recog predicate rejects. */
9006 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9013 /* First mimic offsettable_memref_p. */
9014 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9017 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9018 the latter predicate knows nothing about the mode of the memory
9019 reference and, therefore, assumes that it is the largest supported
9020 mode (TFmode). As a consequence, legitimate offsettable memory
9021 references are rejected. rs6000_legitimate_offset_address_p contains
9022 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9023 at least with a little bit of help here given that we know the
9024 actual registers used. */
9025 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9026 || GET_MODE_SIZE (reg_mode) == 4);
9027 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9028 strict, worst_case);
9031 /* Determine the reassociation width to be used in reassociate_bb.
9032 This takes into account how many parallel operations we
9033 can actually do of a given type, and also the latency.
9037 vect add/sub/mul 2/cycle
9038 fp add/sub/mul 2/cycle
9043 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9046 switch (rs6000_tune)
9048 case PROCESSOR_POWER8:
9049 case PROCESSOR_POWER9:
9050 case PROCESSOR_FUTURE:
9051 if (DECIMAL_FLOAT_MODE_P (mode))
9053 if (VECTOR_MODE_P (mode))
9055 if (INTEGRAL_MODE_P (mode))
9057 if (FLOAT_MODE_P (mode))
9066 /* Change register usage conditional on target flags. */
9068 rs6000_conditional_register_usage (void)
9072 if (TARGET_DEBUG_TARGET)
9073 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9075 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9077 fixed_regs[13] = call_used_regs[13]
9078 = call_really_used_regs[13] = 1;
9080 /* Conditionally disable FPRs. */
9081 if (TARGET_SOFT_FLOAT)
9082 for (i = 32; i < 64; i++)
9083 fixed_regs[i] = call_used_regs[i]
9084 = call_really_used_regs[i] = 1;
9086 /* The TOC register is not killed across calls in a way that is
9087 visible to the compiler. */
9088 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9089 call_really_used_regs[2] = 0;
9091 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9092 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9094 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9095 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9096 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9097 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9099 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9100 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9101 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9102 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9104 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9105 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9106 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9108 if (!TARGET_ALTIVEC && !TARGET_VSX)
9110 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9111 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9112 call_really_used_regs[VRSAVE_REGNO] = 1;
9115 if (TARGET_ALTIVEC || TARGET_VSX)
9116 global_regs[VSCR_REGNO] = 1;
9118 if (TARGET_ALTIVEC_ABI)
9120 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9121 call_used_regs[i] = call_really_used_regs[i] = 1;
9123 /* AIX reserves VR20:31 in non-extended ABI mode. */
9125 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9126 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9131 /* Output insns to set DEST equal to the constant SOURCE as a series of
9132 lis, ori and shl instructions and return TRUE. */
9135 rs6000_emit_set_const (rtx dest, rtx source)
9137 machine_mode mode = GET_MODE (dest);
9142 gcc_checking_assert (CONST_INT_P (source));
9143 c = INTVAL (source);
9148 emit_insn (gen_rtx_SET (dest, source));
9152 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9154 emit_insn (gen_rtx_SET (copy_rtx (temp),
9155 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9156 emit_insn (gen_rtx_SET (dest,
9157 gen_rtx_IOR (SImode, copy_rtx (temp),
9158 GEN_INT (c & 0xffff))));
9162 if (!TARGET_POWERPC64)
9166 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9168 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9170 emit_move_insn (hi, GEN_INT (c >> 32));
9171 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9172 emit_move_insn (lo, GEN_INT (c));
9175 rs6000_emit_set_long_const (dest, c);
9182 insn = get_last_insn ();
9183 set = single_set (insn);
9184 if (! CONSTANT_P (SET_SRC (set)))
9185 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9190 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9191 Output insns to set DEST equal to the constant C as a series of
9192 lis, ori and shl instructions. */
9195 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9198 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9208 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9209 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9210 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9212 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9213 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9215 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9217 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9218 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9220 emit_move_insn (dest,
9221 gen_rtx_IOR (DImode, copy_rtx (temp),
9224 else if (ud3 == 0 && ud4 == 0)
9226 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9228 gcc_assert (ud2 & 0x8000);
9229 emit_move_insn (copy_rtx (temp),
9230 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9232 emit_move_insn (copy_rtx (temp),
9233 gen_rtx_IOR (DImode, copy_rtx (temp),
9235 emit_move_insn (dest,
9236 gen_rtx_ZERO_EXTEND (DImode,
9237 gen_lowpart (SImode,
9240 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9241 || (ud4 == 0 && ! (ud3 & 0x8000)))
9243 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9245 emit_move_insn (copy_rtx (temp),
9246 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9248 emit_move_insn (copy_rtx (temp),
9249 gen_rtx_IOR (DImode, copy_rtx (temp),
9251 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9252 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9255 emit_move_insn (dest,
9256 gen_rtx_IOR (DImode, copy_rtx (temp),
9261 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9263 emit_move_insn (copy_rtx (temp),
9264 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9266 emit_move_insn (copy_rtx (temp),
9267 gen_rtx_IOR (DImode, copy_rtx (temp),
9270 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9271 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9274 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9275 gen_rtx_IOR (DImode, copy_rtx (temp),
9276 GEN_INT (ud2 << 16)));
9278 emit_move_insn (dest,
9279 gen_rtx_IOR (DImode, copy_rtx (temp),
9284 /* Helper for the following. Get rid of [r+r] memory refs
9285 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9288 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9290 if (MEM_P (operands[0])
9291 && !REG_P (XEXP (operands[0], 0))
9292 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9293 GET_MODE (operands[0]), false))
9295 = replace_equiv_address (operands[0],
9296 copy_addr_to_reg (XEXP (operands[0], 0)));
9298 if (MEM_P (operands[1])
9299 && !REG_P (XEXP (operands[1], 0))
9300 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9301 GET_MODE (operands[1]), false))
9303 = replace_equiv_address (operands[1],
9304 copy_addr_to_reg (XEXP (operands[1], 0)));
9307 /* Generate a vector of constants to permute MODE for a little-endian
9308 storage operation by swapping the two halves of a vector. */
9310 rs6000_const_vec (machine_mode mode)
9338 v = rtvec_alloc (subparts);
9340 for (i = 0; i < subparts / 2; ++i)
9341 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9342 for (i = subparts / 2; i < subparts; ++i)
9343 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9348 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9351 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9353 /* Scalar permutations are easier to express in integer modes rather than
9354 floating-point modes, so cast them here. We use V1TImode instead
9355 of TImode to ensure that the values don't go through GPRs. */
9356 if (FLOAT128_VECTOR_P (mode))
9358 dest = gen_lowpart (V1TImode, dest);
9359 source = gen_lowpart (V1TImode, source);
9363 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9365 if (mode == TImode || mode == V1TImode)
9366 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9370 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9371 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9375 /* Emit a little-endian load from vector memory location SOURCE to VSX
9376 register DEST in mode MODE. The load is done with two permuting
9377 insn's that represent an lxvd2x and xxpermdi. */
9379 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9381 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9383 if (mode == TImode || mode == V1TImode)
9386 dest = gen_lowpart (V2DImode, dest);
9387 source = adjust_address (source, V2DImode, 0);
9390 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9391 rs6000_emit_le_vsx_permute (tmp, source, mode);
9392 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9395 /* Emit a little-endian store to vector memory location DEST from VSX
9396 register SOURCE in mode MODE. The store is done with two permuting
9397 insn's that represent an xxpermdi and an stxvd2x. */
9399 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9401 /* This should never be called during or after LRA, because it does
9402 not re-permute the source register. It is intended only for use
9404 gcc_assert (!lra_in_progress && !reload_completed);
9406 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9408 if (mode == TImode || mode == V1TImode)
9411 dest = adjust_address (dest, V2DImode, 0);
9412 source = gen_lowpart (V2DImode, source);
9415 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9416 rs6000_emit_le_vsx_permute (tmp, source, mode);
9417 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9420 /* Emit a sequence representing a little-endian VSX load or store,
9421 moving data from SOURCE to DEST in mode MODE. This is done
9422 separately from rs6000_emit_move to ensure it is called only
9423 during expand. LE VSX loads and stores introduced later are
9424 handled with a split. The expand-time RTL generation allows
9425 us to optimize away redundant pairs of register-permutes. */
9427 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9429 gcc_assert (!BYTES_BIG_ENDIAN
9430 && VECTOR_MEM_VSX_P (mode)
9431 && !TARGET_P9_VECTOR
9432 && !gpr_or_gpr_p (dest, source)
9433 && (MEM_P (source) ^ MEM_P (dest)));
9437 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9438 rs6000_emit_le_vsx_load (dest, source, mode);
9442 if (!REG_P (source))
9443 source = force_reg (mode, source);
9444 rs6000_emit_le_vsx_store (dest, source, mode);
9448 /* Return whether a SFmode or SImode move can be done without converting one
9449 mode to another. This arrises when we have:
9451 (SUBREG:SF (REG:SI ...))
9452 (SUBREG:SI (REG:SF ...))
9454 and one of the values is in a floating point/vector register, where SFmode
9455 scalars are stored in DFmode format. */
9458 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9460 if (TARGET_ALLOW_SF_SUBREG)
9463 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9466 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9469 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9470 if (SUBREG_P (dest))
9472 rtx dest_subreg = SUBREG_REG (dest);
9473 rtx src_subreg = SUBREG_REG (src);
9474 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9481 /* Helper function to change moves with:
9483 (SUBREG:SF (REG:SI)) and
9484 (SUBREG:SI (REG:SF))
9486 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9487 values are stored as DFmode values in the VSX registers. We need to convert
9488 the bits before we can use a direct move or operate on the bits in the
9489 vector register as an integer type.
9491 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9494 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9496 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9497 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9498 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9500 rtx inner_source = SUBREG_REG (source);
9501 machine_mode inner_mode = GET_MODE (inner_source);
9503 if (mode == SImode && inner_mode == SFmode)
9505 emit_insn (gen_movsi_from_sf (dest, inner_source));
9509 if (mode == SFmode && inner_mode == SImode)
9511 emit_insn (gen_movsf_from_si (dest, inner_source));
9519 /* Emit a move from SOURCE to DEST in mode MODE. */
9521 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9525 operands[1] = source;
9527 if (TARGET_DEBUG_ADDR)
9530 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9531 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9532 GET_MODE_NAME (mode),
9535 can_create_pseudo_p ());
9537 fprintf (stderr, "source:\n");
9541 /* Check that we get CONST_WIDE_INT only when we should. */
9542 if (CONST_WIDE_INT_P (operands[1])
9543 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9546 #ifdef HAVE_AS_GNU_ATTRIBUTE
9547 /* If we use a long double type, set the flags in .gnu_attribute that say
9548 what the long double type is. This is to allow the linker's warning
9549 message for the wrong long double to be useful, even if the function does
9550 not do a call (for example, doing a 128-bit add on power9 if the long
9551 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9552 used if they aren't the default long dobule type. */
9553 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9555 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9556 rs6000_passes_float = rs6000_passes_long_double = true;
9558 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9559 rs6000_passes_float = rs6000_passes_long_double = true;
9563 /* See if we need to special case SImode/SFmode SUBREG moves. */
9564 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9565 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9568 /* Check if GCC is setting up a block move that will end up using FP
9569 registers as temporaries. We must make sure this is acceptable. */
9570 if (MEM_P (operands[0])
9571 && MEM_P (operands[1])
9573 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9574 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9575 && ! (rs6000_slow_unaligned_access (SImode,
9576 (MEM_ALIGN (operands[0]) > 32
9577 ? 32 : MEM_ALIGN (operands[0])))
9578 || rs6000_slow_unaligned_access (SImode,
9579 (MEM_ALIGN (operands[1]) > 32
9580 ? 32 : MEM_ALIGN (operands[1]))))
9581 && ! MEM_VOLATILE_P (operands [0])
9582 && ! MEM_VOLATILE_P (operands [1]))
9584 emit_move_insn (adjust_address (operands[0], SImode, 0),
9585 adjust_address (operands[1], SImode, 0));
9586 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9587 adjust_address (copy_rtx (operands[1]), SImode, 4));
9591 if (can_create_pseudo_p () && MEM_P (operands[0])
9592 && !gpc_reg_operand (operands[1], mode))
9593 operands[1] = force_reg (mode, operands[1]);
9595 /* Recognize the case where operand[1] is a reference to thread-local
9596 data and load its address to a register. */
9597 if (tls_referenced_p (operands[1]))
9599 enum tls_model model;
9600 rtx tmp = operands[1];
9603 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9605 addend = XEXP (XEXP (tmp, 0), 1);
9606 tmp = XEXP (XEXP (tmp, 0), 0);
9609 gcc_assert (SYMBOL_REF_P (tmp));
9610 model = SYMBOL_REF_TLS_MODEL (tmp);
9611 gcc_assert (model != 0);
9613 tmp = rs6000_legitimize_tls_address (tmp, model);
9616 tmp = gen_rtx_PLUS (mode, tmp, addend);
9617 tmp = force_operand (tmp, operands[0]);
9622 /* 128-bit constant floating-point values on Darwin should really be loaded
9623 as two parts. However, this premature splitting is a problem when DFmode
9624 values can go into Altivec registers. */
9625 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9626 && !reg_addr[DFmode].scalar_in_vmx_p)
9628 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9629 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9631 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9632 GET_MODE_SIZE (DFmode)),
9633 simplify_gen_subreg (DFmode, operands[1], mode,
9634 GET_MODE_SIZE (DFmode)),
9639 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9640 p1:SD) if p1 is not of floating point class and p0 is spilled as
9641 we can have no analogous movsd_store for this. */
9642 if (lra_in_progress && mode == DDmode
9643 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9644 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9645 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9646 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9649 int regno = REGNO (SUBREG_REG (operands[1]));
9651 if (!HARD_REGISTER_NUM_P (regno))
9653 cl = reg_preferred_class (regno);
9654 regno = reg_renumber[regno];
9656 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9658 if (regno >= 0 && ! FP_REGNO_P (regno))
9661 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9662 operands[1] = SUBREG_REG (operands[1]);
9667 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9668 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9669 && (REG_P (operands[1])
9670 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9672 int regno = reg_or_subregno (operands[1]);
9675 if (!HARD_REGISTER_NUM_P (regno))
9677 cl = reg_preferred_class (regno);
9678 gcc_assert (cl != NO_REGS);
9679 regno = reg_renumber[regno];
9681 regno = ira_class_hard_regs[cl][0];
9683 if (FP_REGNO_P (regno))
9685 if (GET_MODE (operands[0]) != DDmode)
9686 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9687 emit_insn (gen_movsd_store (operands[0], operands[1]));
9689 else if (INT_REGNO_P (regno))
9690 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9695 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9696 p:DD)) if p0 is not of floating point class and p1 is spilled as
9697 we can have no analogous movsd_load for this. */
9698 if (lra_in_progress && mode == DDmode
9699 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9700 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9701 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9702 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9705 int regno = REGNO (SUBREG_REG (operands[0]));
9707 if (!HARD_REGISTER_NUM_P (regno))
9709 cl = reg_preferred_class (regno);
9710 regno = reg_renumber[regno];
9712 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9714 if (regno >= 0 && ! FP_REGNO_P (regno))
9717 operands[0] = SUBREG_REG (operands[0]);
9718 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9723 && (REG_P (operands[0])
9724 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9725 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9726 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9728 int regno = reg_or_subregno (operands[0]);
9731 if (!HARD_REGISTER_NUM_P (regno))
9733 cl = reg_preferred_class (regno);
9734 gcc_assert (cl != NO_REGS);
9735 regno = reg_renumber[regno];
9737 regno = ira_class_hard_regs[cl][0];
9739 if (FP_REGNO_P (regno))
9741 if (GET_MODE (operands[1]) != DDmode)
9742 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9743 emit_insn (gen_movsd_load (operands[0], operands[1]));
9745 else if (INT_REGNO_P (regno))
9746 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9752 /* FIXME: In the long term, this switch statement should go away
9753 and be replaced by a sequence of tests based on things like
9759 if (CONSTANT_P (operands[1])
9760 && !CONST_INT_P (operands[1]))
9761 operands[1] = force_const_mem (mode, operands[1]);
9768 if (FLOAT128_2REG_P (mode))
9769 rs6000_eliminate_indexed_memrefs (operands);
9776 if (CONSTANT_P (operands[1])
9777 && ! easy_fp_constant (operands[1], mode))
9778 operands[1] = force_const_mem (mode, operands[1]);
9788 if (CONSTANT_P (operands[1])
9789 && !easy_vector_constant (operands[1], mode))
9790 operands[1] = force_const_mem (mode, operands[1]);
9795 /* Use default pattern for address of ELF small data */
9798 && DEFAULT_ABI == ABI_V4
9799 && (SYMBOL_REF_P (operands[1])
9800 || GET_CODE (operands[1]) == CONST)
9801 && small_data_operand (operands[1], mode))
9803 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9807 if (DEFAULT_ABI == ABI_V4
9808 && mode == Pmode && mode == SImode
9809 && flag_pic == 1 && got_operand (operands[1], mode))
9811 emit_insn (gen_movsi_got (operands[0], operands[1]));
9815 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9819 && CONSTANT_P (operands[1])
9820 && GET_CODE (operands[1]) != HIGH
9821 && !CONST_INT_P (operands[1]))
9823 rtx target = (!can_create_pseudo_p ()
9825 : gen_reg_rtx (mode));
9827 /* If this is a function address on -mcall-aixdesc,
9828 convert it to the address of the descriptor. */
9829 if (DEFAULT_ABI == ABI_AIX
9830 && SYMBOL_REF_P (operands[1])
9831 && XSTR (operands[1], 0)[0] == '.')
9833 const char *name = XSTR (operands[1], 0);
9835 while (*name == '.')
9837 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9838 CONSTANT_POOL_ADDRESS_P (new_ref)
9839 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9840 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9841 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9842 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9843 operands[1] = new_ref;
9846 if (DEFAULT_ABI == ABI_DARWIN)
9849 if (MACHO_DYNAMIC_NO_PIC_P)
9851 /* Take care of any required data indirection. */
9852 operands[1] = rs6000_machopic_legitimize_pic_address (
9853 operands[1], mode, operands[0]);
9854 if (operands[0] != operands[1])
9855 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9859 emit_insn (gen_macho_high (target, operands[1]));
9860 emit_insn (gen_macho_low (operands[0], target, operands[1]));
9864 emit_insn (gen_elf_high (target, operands[1]));
9865 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9869 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9870 and we have put it in the TOC, we just need to make a TOC-relative
9873 && SYMBOL_REF_P (operands[1])
9874 && use_toc_relative_ref (operands[1], mode))
9875 operands[1] = create_TOC_reference (operands[1], operands[0]);
9876 else if (mode == Pmode
9877 && CONSTANT_P (operands[1])
9878 && GET_CODE (operands[1]) != HIGH
9879 && ((REG_P (operands[0])
9880 && FP_REGNO_P (REGNO (operands[0])))
9881 || !CONST_INT_P (operands[1])
9882 || (num_insns_constant (operands[1], mode)
9883 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9884 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
9885 && (TARGET_CMODEL == CMODEL_SMALL
9886 || can_create_pseudo_p ()
9887 || (REG_P (operands[0])
9888 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9892 /* Darwin uses a special PIC legitimizer. */
9893 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9896 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9898 if (operands[0] != operands[1])
9899 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9904 /* If we are to limit the number of things we put in the TOC and
9905 this is a symbol plus a constant we can add in one insn,
9906 just put the symbol in the TOC and add the constant. */
9907 if (GET_CODE (operands[1]) == CONST
9908 && TARGET_NO_SUM_IN_TOC
9909 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9910 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9911 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9912 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
9913 && ! side_effects_p (operands[0]))
9916 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9917 rtx other = XEXP (XEXP (operands[1], 0), 1);
9919 sym = force_reg (mode, sym);
9920 emit_insn (gen_add3_insn (operands[0], sym, other));
9924 operands[1] = force_const_mem (mode, operands[1]);
9927 && SYMBOL_REF_P (XEXP (operands[1], 0))
9928 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
9930 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9932 operands[1] = gen_const_mem (mode, tocref);
9933 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9939 if (!VECTOR_MEM_VSX_P (TImode))
9940 rs6000_eliminate_indexed_memrefs (operands);
9944 rs6000_eliminate_indexed_memrefs (operands);
9948 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9951 /* Above, we may have called force_const_mem which may have returned
9952 an invalid address. If we can, fix this up; otherwise, reload will
9953 have to deal with it. */
9954 if (MEM_P (operands[1]))
9955 operands[1] = validize_mem (operands[1]);
9957 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9960 /* Nonzero if we can use a floating-point register to pass this arg. */
9961 #define USE_FP_FOR_ARG_P(CUM,MODE) \
9962 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
9963 && (CUM)->fregno <= FP_ARG_MAX_REG \
9964 && TARGET_HARD_FLOAT)
9966 /* Nonzero if we can use an AltiVec register to pass this arg. */
9967 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
9968 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
9969 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
9970 && TARGET_ALTIVEC_ABI \
9973 /* Walk down the type tree of TYPE counting consecutive base elements.
9974 If *MODEP is VOIDmode, then set it to the first valid floating point
9975 or vector type. If a non-floating point or vector type is found, or
9976 if a floating point or vector type that doesn't match a non-VOIDmode
9977 *MODEP is found, then return -1, otherwise return the count in the
9981 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
9986 switch (TREE_CODE (type))
9989 mode = TYPE_MODE (type);
9990 if (!SCALAR_FLOAT_MODE_P (mode))
9993 if (*modep == VOIDmode)
10002 mode = TYPE_MODE (TREE_TYPE (type));
10003 if (!SCALAR_FLOAT_MODE_P (mode))
10006 if (*modep == VOIDmode)
10009 if (*modep == mode)
10015 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
10018 /* Use V4SImode as representative of all 128-bit vector types. */
10019 size = int_size_in_bytes (type);
10029 if (*modep == VOIDmode)
10032 /* Vector modes are considered to be opaque: two vectors are
10033 equivalent for the purposes of being homogeneous aggregates
10034 if they are the same size. */
10035 if (*modep == mode)
10043 tree index = TYPE_DOMAIN (type);
10045 /* Can't handle incomplete types nor sizes that are not
10047 if (!COMPLETE_TYPE_P (type)
10048 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10051 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
10054 || !TYPE_MAX_VALUE (index)
10055 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10056 || !TYPE_MIN_VALUE (index)
10057 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10061 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10062 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10064 /* There must be no padding. */
10065 if (wi::to_wide (TYPE_SIZE (type))
10066 != count * GET_MODE_BITSIZE (*modep))
10078 /* Can't handle incomplete types nor sizes that are not
10080 if (!COMPLETE_TYPE_P (type)
10081 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10084 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10086 if (TREE_CODE (field) != FIELD_DECL)
10089 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10092 count += sub_count;
10095 /* There must be no padding. */
10096 if (wi::to_wide (TYPE_SIZE (type))
10097 != count * GET_MODE_BITSIZE (*modep))
10104 case QUAL_UNION_TYPE:
10106 /* These aren't very interesting except in a degenerate case. */
10111 /* Can't handle incomplete types nor sizes that are not
10113 if (!COMPLETE_TYPE_P (type)
10114 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10117 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10119 if (TREE_CODE (field) != FIELD_DECL)
10122 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10125 count = count > sub_count ? count : sub_count;
10128 /* There must be no padding. */
10129 if (wi::to_wide (TYPE_SIZE (type))
10130 != count * GET_MODE_BITSIZE (*modep))
10143 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10144 float or vector aggregate that shall be passed in FP/vector registers
10145 according to the ELFv2 ABI, return the homogeneous element mode in
10146 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10148 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10151 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10152 machine_mode *elt_mode,
10155 /* Note that we do not accept complex types at the top level as
10156 homogeneous aggregates; these types are handled via the
10157 targetm.calls.split_complex_arg mechanism. Complex types
10158 can be elements of homogeneous aggregates, however. */
10159 if (TARGET_HARD_FLOAT && DEFAULT_ABI == ABI_ELFv2 && type
10160 && AGGREGATE_TYPE_P (type))
10162 machine_mode field_mode = VOIDmode;
10163 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10165 if (field_count > 0)
10167 int reg_size = ALTIVEC_OR_VSX_VECTOR_MODE (field_mode) ? 16 : 8;
10168 int field_size = ROUND_UP (GET_MODE_SIZE (field_mode), reg_size);
10170 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10171 up to AGGR_ARG_NUM_REG registers. */
10172 if (field_count * field_size <= AGGR_ARG_NUM_REG * reg_size)
10175 *elt_mode = field_mode;
10177 *n_elts = field_count;
10190 /* Return a nonzero value to say to return the function value in
10191 memory, just as large structures are always returned. TYPE will be
10192 the data type of the value, and FNTYPE will be the type of the
10193 function doing the returning, or @code{NULL} for libcalls.
10195 The AIX ABI for the RS/6000 specifies that all structures are
10196 returned in memory. The Darwin ABI does the same.
10198 For the Darwin 64 Bit ABI, a function result can be returned in
10199 registers or in memory, depending on the size of the return data
10200 type. If it is returned in registers, the value occupies the same
10201 registers as it would if it were the first and only function
10202 argument. Otherwise, the function places its result in memory at
10203 the location pointed to by GPR3.
10205 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10206 but a draft put them in memory, and GCC used to implement the draft
10207 instead of the final standard. Therefore, aix_struct_return
10208 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10209 compatibility can change DRAFT_V4_STRUCT_RET to override the
10210 default, and -m switches get the final word. See
10211 rs6000_option_override_internal for more details.
10213 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10214 long double support is enabled. These values are returned in memory.
10216 int_size_in_bytes returns -1 for variable size objects, which go in
10217 memory always. The cast to unsigned makes -1 > 8. */
10220 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10222 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
10224 && rs6000_darwin64_abi
10225 && TREE_CODE (type) == RECORD_TYPE
10226 && int_size_in_bytes (type) > 0)
10228 CUMULATIVE_ARGS valcum;
10232 valcum.fregno = FP_ARG_MIN_REG;
10233 valcum.vregno = ALTIVEC_ARG_MIN_REG;
10234 /* Do a trial code generation as if this were going to be passed
10235 as an argument; if any part goes in memory, we return NULL. */
10236 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
10239 /* Otherwise fall through to more conventional ABI rules. */
10242 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
10243 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
10247 /* The ELFv2 ABI returns aggregates up to 16B in registers */
10248 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
10249 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
10252 if (AGGREGATE_TYPE_P (type)
10253 && (aix_struct_return
10254 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
10257 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10258 modes only exist for GCC vector types if -maltivec. */
10259 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
10260 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
10263 /* Return synthetic vectors in memory. */
10264 if (TREE_CODE (type) == VECTOR_TYPE
10265 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10267 static bool warned_for_return_big_vectors = false;
10268 if (!warned_for_return_big_vectors)
10270 warning (OPT_Wpsabi, "GCC vector returned by reference: "
10271 "non-standard ABI extension with no compatibility "
10273 warned_for_return_big_vectors = true;
10278 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
10279 && FLOAT128_IEEE_P (TYPE_MODE (type)))
10285 /* Specify whether values returned in registers should be at the most
10286 significant end of a register. We want aggregates returned by
10287 value to match the way aggregates are passed to functions. */
10290 rs6000_return_in_msb (const_tree valtype)
10292 return (DEFAULT_ABI == ABI_ELFv2
10293 && BYTES_BIG_ENDIAN
10294 && AGGREGATE_TYPE_P (valtype)
10295 && (rs6000_function_arg_padding (TYPE_MODE (valtype), valtype)
10299 #ifdef HAVE_AS_GNU_ATTRIBUTE
10300 /* Return TRUE if a call to function FNDECL may be one that
10301 potentially affects the function calling ABI of the object file. */
10304 call_ABI_of_interest (tree fndecl)
10306 if (rs6000_gnu_attr && symtab->state == EXPANSION)
10308 struct cgraph_node *c_node;
10310 /* Libcalls are always interesting. */
10311 if (fndecl == NULL_TREE)
10314 /* Any call to an external function is interesting. */
10315 if (DECL_EXTERNAL (fndecl))
10318 /* Interesting functions that we are emitting in this object file. */
10319 c_node = cgraph_node::get (fndecl);
10320 c_node = c_node->ultimate_alias_target ();
10321 return !c_node->only_called_directly_p ();
10327 /* Initialize a variable CUM of type CUMULATIVE_ARGS
10328 for a call to a function whose data type is FNTYPE.
10329 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
10331 For incoming args we set the number of arguments in the prototype large
10332 so we never return a PARALLEL. */
10335 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
10336 rtx libname ATTRIBUTE_UNUSED, int incoming,
10337 int libcall, int n_named_args,
10339 machine_mode return_mode ATTRIBUTE_UNUSED)
10341 static CUMULATIVE_ARGS zero_cumulative;
10343 *cum = zero_cumulative;
10345 cum->fregno = FP_ARG_MIN_REG;
10346 cum->vregno = ALTIVEC_ARG_MIN_REG;
10347 cum->prototype = (fntype && prototype_p (fntype));
10348 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
10349 ? CALL_LIBCALL : CALL_NORMAL);
10350 cum->sysv_gregno = GP_ARG_MIN_REG;
10351 cum->stdarg = stdarg_p (fntype);
10352 cum->libcall = libcall;
10354 cum->nargs_prototype = 0;
10355 if (incoming || cum->prototype)
10356 cum->nargs_prototype = n_named_args;
10358 /* Check for a longcall attribute. */
10359 if ((!fntype && rs6000_default_long_calls)
10361 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
10362 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
10363 cum->call_cookie |= CALL_LONG;
10364 else if (DEFAULT_ABI != ABI_DARWIN)
10366 bool is_local = (fndecl
10367 && !DECL_EXTERNAL (fndecl)
10368 && !DECL_WEAK (fndecl)
10369 && (*targetm.binds_local_p) (fndecl));
10375 && lookup_attribute ("noplt", TYPE_ATTRIBUTES (fntype)))
10376 cum->call_cookie |= CALL_LONG;
10381 && lookup_attribute ("plt", TYPE_ATTRIBUTES (fntype))))
10382 cum->call_cookie |= CALL_LONG;
10386 if (TARGET_DEBUG_ARG)
10388 fprintf (stderr, "\ninit_cumulative_args:");
10391 tree ret_type = TREE_TYPE (fntype);
10392 fprintf (stderr, " ret code = %s,",
10393 get_tree_code_name (TREE_CODE (ret_type)));
10396 if (cum->call_cookie & CALL_LONG)
10397 fprintf (stderr, " longcall,");
10399 fprintf (stderr, " proto = %d, nargs = %d\n",
10400 cum->prototype, cum->nargs_prototype);
10403 #ifdef HAVE_AS_GNU_ATTRIBUTE
10404 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
10406 cum->escapes = call_ABI_of_interest (fndecl);
10413 return_type = TREE_TYPE (fntype);
10414 return_mode = TYPE_MODE (return_type);
10417 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
10419 if (return_type != NULL)
10421 if (TREE_CODE (return_type) == RECORD_TYPE
10422 && TYPE_TRANSPARENT_AGGR (return_type))
10424 return_type = TREE_TYPE (first_field (return_type));
10425 return_mode = TYPE_MODE (return_type);
10427 if (AGGREGATE_TYPE_P (return_type)
10428 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
10430 rs6000_returns_struct = true;
10432 if (SCALAR_FLOAT_MODE_P (return_mode))
10434 rs6000_passes_float = true;
10435 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
10436 && (FLOAT128_IBM_P (return_mode)
10437 || FLOAT128_IEEE_P (return_mode)
10438 || (return_type != NULL
10439 && (TYPE_MAIN_VARIANT (return_type)
10440 == long_double_type_node))))
10441 rs6000_passes_long_double = true;
10443 /* Note if we passed or return a IEEE 128-bit type. We changed
10444 the mangling for these types, and we may need to make an alias
10445 with the old mangling. */
10446 if (FLOAT128_IEEE_P (return_mode))
10447 rs6000_passes_ieee128 = true;
10449 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode))
10450 rs6000_passes_vector = true;
10457 && TARGET_ALTIVEC_ABI
10458 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
10460 error ("cannot return value in vector register because"
10461 " altivec instructions are disabled, use %qs"
10462 " to enable them", "-maltivec");
10466 /* The mode the ABI uses for a word. This is not the same as word_mode
10467 for -m32 -mpowerpc64. This is used to implement various target hooks. */
10469 static scalar_int_mode
10470 rs6000_abi_word_mode (void)
10472 return TARGET_32BIT ? SImode : DImode;
10475 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
10477 rs6000_offload_options (void)
10480 return xstrdup ("-foffload-abi=lp64");
10482 return xstrdup ("-foffload-abi=ilp32");
10485 /* On rs6000, function arguments are promoted, as are function return
10488 static machine_mode
10489 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
10491 int *punsignedp ATTRIBUTE_UNUSED,
10494 PROMOTE_MODE (mode, *punsignedp, type);
10499 /* Return true if TYPE must be passed on the stack and not in registers. */
10502 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
10504 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
10505 return must_pass_in_stack_var_size (mode, type);
10507 return must_pass_in_stack_var_size_or_pad (mode, type);
10511 is_complex_IBM_long_double (machine_mode mode)
10513 return mode == ICmode || (mode == TCmode && FLOAT128_IBM_P (TCmode));
10516 /* Whether ABI_V4 passes MODE args to a function in floating point
10520 abi_v4_pass_in_fpr (machine_mode mode, bool named)
10522 if (!TARGET_HARD_FLOAT)
10524 if (mode == DFmode)
10526 if (mode == SFmode && named)
10528 /* ABI_V4 passes complex IBM long double in 8 gprs.
10529 Stupid, but we can't change the ABI now. */
10530 if (is_complex_IBM_long_double (mode))
10532 if (FLOAT128_2REG_P (mode))
10534 if (DECIMAL_FLOAT_MODE_P (mode))
10539 /* Implement TARGET_FUNCTION_ARG_PADDING.
10541 For the AIX ABI structs are always stored left shifted in their
10544 static pad_direction
10545 rs6000_function_arg_padding (machine_mode mode, const_tree type)
10547 #ifndef AGGREGATE_PADDING_FIXED
10548 #define AGGREGATE_PADDING_FIXED 0
10550 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
10551 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
10554 if (!AGGREGATE_PADDING_FIXED)
10556 /* GCC used to pass structures of the same size as integer types as
10557 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
10558 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
10559 passed padded downward, except that -mstrict-align further
10560 muddied the water in that multi-component structures of 2 and 4
10561 bytes in size were passed padded upward.
10563 The following arranges for best compatibility with previous
10564 versions of gcc, but removes the -mstrict-align dependency. */
10565 if (BYTES_BIG_ENDIAN)
10567 HOST_WIDE_INT size = 0;
10569 if (mode == BLKmode)
10571 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
10572 size = int_size_in_bytes (type);
10575 size = GET_MODE_SIZE (mode);
10577 if (size == 1 || size == 2 || size == 4)
10578 return PAD_DOWNWARD;
10583 if (AGGREGATES_PAD_UPWARD_ALWAYS)
10585 if (type != 0 && AGGREGATE_TYPE_P (type))
10589 /* Fall back to the default. */
10590 return default_function_arg_padding (mode, type);
10593 /* If defined, a C expression that gives the alignment boundary, in bits,
10594 of an argument with the specified mode and type. If it is not defined,
10595 PARM_BOUNDARY is used for all arguments.
10597 V.4 wants long longs and doubles to be double word aligned. Just
10598 testing the mode size is a boneheaded way to do this as it means
10599 that other types such as complex int are also double word aligned.
10600 However, we're stuck with this because changing the ABI might break
10601 existing library interfaces.
10603 Quadword align Altivec/VSX vectors.
10604 Quadword align large synthetic vector types. */
10606 static unsigned int
10607 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
10609 machine_mode elt_mode;
10612 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10614 if (DEFAULT_ABI == ABI_V4
10615 && (GET_MODE_SIZE (mode) == 8
10616 || (TARGET_HARD_FLOAT
10617 && !is_complex_IBM_long_double (mode)
10618 && FLOAT128_2REG_P (mode))))
10620 else if (FLOAT128_VECTOR_P (mode))
10622 else if (type && TREE_CODE (type) == VECTOR_TYPE
10623 && int_size_in_bytes (type) >= 8
10624 && int_size_in_bytes (type) < 16)
10626 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10627 || (type && TREE_CODE (type) == VECTOR_TYPE
10628 && int_size_in_bytes (type) >= 16))
10631 /* Aggregate types that need > 8 byte alignment are quadword-aligned
10632 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
10633 -mcompat-align-parm is used. */
10634 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
10635 || DEFAULT_ABI == ABI_ELFv2)
10636 && type && TYPE_ALIGN (type) > 64)
10638 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
10639 or homogeneous float/vector aggregates here. We already handled
10640 vector aggregates above, but still need to check for float here. */
10641 bool aggregate_p = (AGGREGATE_TYPE_P (type)
10642 && !SCALAR_FLOAT_MODE_P (elt_mode));
10644 /* We used to check for BLKmode instead of the above aggregate type
10645 check. Warn when this results in any difference to the ABI. */
10646 if (aggregate_p != (mode == BLKmode))
10648 static bool warned;
10649 if (!warned && warn_psabi)
10652 inform (input_location,
10653 "the ABI of passing aggregates with %d-byte alignment"
10654 " has changed in GCC 5",
10655 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
10663 /* Similar for the Darwin64 ABI. Note that for historical reasons we
10664 implement the "aggregate type" check as a BLKmode check here; this
10665 means certain aggregate types are in fact not aligned. */
10666 if (TARGET_MACHO && rs6000_darwin64_abi
10668 && type && TYPE_ALIGN (type) > 64)
10671 return PARM_BOUNDARY;
10674 /* The offset in words to the start of the parameter save area. */
10676 static unsigned int
10677 rs6000_parm_offset (void)
10679 return (DEFAULT_ABI == ABI_V4 ? 2
10680 : DEFAULT_ABI == ABI_ELFv2 ? 4
10684 /* For a function parm of MODE and TYPE, return the starting word in
10685 the parameter area. NWORDS of the parameter area are already used. */
10687 static unsigned int
10688 rs6000_parm_start (machine_mode mode, const_tree type,
10689 unsigned int nwords)
10691 unsigned int align;
10693 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
10694 return nwords + (-(rs6000_parm_offset () + nwords) & align);
10697 /* Compute the size (in words) of a function argument. */
10699 static unsigned long
10700 rs6000_arg_size (machine_mode mode, const_tree type)
10702 unsigned long size;
10704 if (mode != BLKmode)
10705 size = GET_MODE_SIZE (mode);
10707 size = int_size_in_bytes (type);
10710 return (size + 3) >> 2;
10712 return (size + 7) >> 3;
10715 /* Use this to flush pending int fields. */
10718 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
10719 HOST_WIDE_INT bitpos, int final)
10721 unsigned int startbit, endbit;
10722 int intregs, intoffset;
10724 /* Handle the situations where a float is taking up the first half
10725 of the GPR, and the other half is empty (typically due to
10726 alignment restrictions). We can detect this by a 8-byte-aligned
10727 int field, or by seeing that this is the final flush for this
10728 argument. Count the word and continue on. */
10729 if (cum->floats_in_gpr == 1
10730 && (cum->intoffset % 64 == 0
10731 || (cum->intoffset == -1 && final)))
10734 cum->floats_in_gpr = 0;
10737 if (cum->intoffset == -1)
10740 intoffset = cum->intoffset;
10741 cum->intoffset = -1;
10742 cum->floats_in_gpr = 0;
10744 if (intoffset % BITS_PER_WORD != 0)
10746 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
10747 if (!int_mode_for_size (bits, 0).exists ())
10749 /* We couldn't find an appropriate mode, which happens,
10750 e.g., in packed structs when there are 3 bytes to load.
10751 Back intoffset back to the beginning of the word in this
10753 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
10757 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
10758 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
10759 intregs = (endbit - startbit) / BITS_PER_WORD;
10760 cum->words += intregs;
10761 /* words should be unsigned. */
10762 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
10764 int pad = (endbit/BITS_PER_WORD) - cum->words;
10769 /* The darwin64 ABI calls for us to recurse down through structs,
10770 looking for elements passed in registers. Unfortunately, we have
10771 to track int register count here also because of misalignments
10772 in powerpc alignment mode. */
10775 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
10777 HOST_WIDE_INT startbitpos)
10781 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10782 if (TREE_CODE (f) == FIELD_DECL)
10784 HOST_WIDE_INT bitpos = startbitpos;
10785 tree ftype = TREE_TYPE (f);
10787 if (ftype == error_mark_node)
10789 mode = TYPE_MODE (ftype);
10791 if (DECL_SIZE (f) != 0
10792 && tree_fits_uhwi_p (bit_position (f)))
10793 bitpos += int_bit_position (f);
10795 /* ??? FIXME: else assume zero offset. */
10797 if (TREE_CODE (ftype) == RECORD_TYPE)
10798 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
10799 else if (USE_FP_FOR_ARG_P (cum, mode))
10801 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
10802 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10803 cum->fregno += n_fpregs;
10804 /* Single-precision floats present a special problem for
10805 us, because they are smaller than an 8-byte GPR, and so
10806 the structure-packing rules combined with the standard
10807 varargs behavior mean that we want to pack float/float
10808 and float/int combinations into a single register's
10809 space. This is complicated by the arg advance flushing,
10810 which works on arbitrarily large groups of int-type
10812 if (mode == SFmode)
10814 if (cum->floats_in_gpr == 1)
10816 /* Two floats in a word; count the word and reset
10817 the float count. */
10819 cum->floats_in_gpr = 0;
10821 else if (bitpos % 64 == 0)
10823 /* A float at the beginning of an 8-byte word;
10824 count it and put off adjusting cum->words until
10825 we see if a arg advance flush is going to do it
10827 cum->floats_in_gpr++;
10831 /* The float is at the end of a word, preceded
10832 by integer fields, so the arg advance flush
10833 just above has already set cum->words and
10834 everything is taken care of. */
10838 cum->words += n_fpregs;
10840 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10842 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10846 else if (cum->intoffset == -1)
10847 cum->intoffset = bitpos;
10851 /* Check for an item that needs to be considered specially under the darwin 64
10852 bit ABI. These are record types where the mode is BLK or the structure is
10853 8 bytes in size. */
10855 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
10857 return rs6000_darwin64_abi
10858 && ((mode == BLKmode
10859 && TREE_CODE (type) == RECORD_TYPE
10860 && int_size_in_bytes (type) > 0)
10861 || (type && TREE_CODE (type) == RECORD_TYPE
10862 && int_size_in_bytes (type) == 8)) ? 1 : 0;
10865 /* Update the data in CUM to advance over an argument
10866 of mode MODE and data type TYPE.
10867 (TYPE is null for libcalls where that information may not be available.)
10869 Note that for args passed by reference, function_arg will be called
10870 with MODE and TYPE set to that of the pointer to the arg, not the arg
10874 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
10875 const_tree type, bool named, int depth)
10877 machine_mode elt_mode;
10880 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10882 /* Only tick off an argument if we're not recursing. */
10884 cum->nargs_prototype--;
10886 #ifdef HAVE_AS_GNU_ATTRIBUTE
10887 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
10890 if (SCALAR_FLOAT_MODE_P (mode))
10892 rs6000_passes_float = true;
10893 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
10894 && (FLOAT128_IBM_P (mode)
10895 || FLOAT128_IEEE_P (mode)
10897 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
10898 rs6000_passes_long_double = true;
10900 /* Note if we passed or return a IEEE 128-bit type. We changed the
10901 mangling for these types, and we may need to make an alias with
10902 the old mangling. */
10903 if (FLOAT128_IEEE_P (mode))
10904 rs6000_passes_ieee128 = true;
10906 if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
10907 rs6000_passes_vector = true;
10911 if (TARGET_ALTIVEC_ABI
10912 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10913 || (type && TREE_CODE (type) == VECTOR_TYPE
10914 && int_size_in_bytes (type) == 16)))
10916 bool stack = false;
10918 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10920 cum->vregno += n_elts;
10922 if (!TARGET_ALTIVEC)
10923 error ("cannot pass argument in vector register because"
10924 " altivec instructions are disabled, use %qs"
10925 " to enable them", "-maltivec");
10927 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
10928 even if it is going to be passed in a vector register.
10929 Darwin does the same for variable-argument functions. */
10930 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10932 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
10942 /* Vector parameters must be 16-byte aligned. In 32-bit
10943 mode this means we need to take into account the offset
10944 to the parameter save area. In 64-bit mode, they just
10945 have to start on an even word, since the parameter save
10946 area is 16-byte aligned. */
10948 align = -(rs6000_parm_offset () + cum->words) & 3;
10950 align = cum->words & 1;
10951 cum->words += align + rs6000_arg_size (mode, type);
10953 if (TARGET_DEBUG_ARG)
10955 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
10956 cum->words, align);
10957 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
10958 cum->nargs_prototype, cum->prototype,
10959 GET_MODE_NAME (mode));
10963 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10965 int size = int_size_in_bytes (type);
10966 /* Variable sized types have size == -1 and are
10967 treated as if consisting entirely of ints.
10968 Pad to 16 byte boundary if needed. */
10969 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10970 && (cum->words % 2) != 0)
10972 /* For varargs, we can just go up by the size of the struct. */
10974 cum->words += (size + 7) / 8;
10977 /* It is tempting to say int register count just goes up by
10978 sizeof(type)/8, but this is wrong in a case such as
10979 { int; double; int; } [powerpc alignment]. We have to
10980 grovel through the fields for these too. */
10981 cum->intoffset = 0;
10982 cum->floats_in_gpr = 0;
10983 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
10984 rs6000_darwin64_record_arg_advance_flush (cum,
10985 size * BITS_PER_UNIT, 1);
10987 if (TARGET_DEBUG_ARG)
10989 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
10990 cum->words, TYPE_ALIGN (type), size);
10992 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
10993 cum->nargs_prototype, cum->prototype,
10994 GET_MODE_NAME (mode));
10997 else if (DEFAULT_ABI == ABI_V4)
10999 if (abi_v4_pass_in_fpr (mode, named))
11001 /* _Decimal128 must use an even/odd register pair. This assumes
11002 that the register number is odd when fregno is odd. */
11003 if (mode == TDmode && (cum->fregno % 2) == 1)
11006 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11007 <= FP_ARG_V4_MAX_REG)
11008 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
11011 cum->fregno = FP_ARG_V4_MAX_REG + 1;
11012 if (mode == DFmode || FLOAT128_IBM_P (mode)
11013 || mode == DDmode || mode == TDmode)
11014 cum->words += cum->words & 1;
11015 cum->words += rs6000_arg_size (mode, type);
11020 int n_words = rs6000_arg_size (mode, type);
11021 int gregno = cum->sysv_gregno;
11023 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
11024 As does any other 2 word item such as complex int due to a
11025 historical mistake. */
11027 gregno += (1 - gregno) & 1;
11029 /* Multi-reg args are not split between registers and stack. */
11030 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11032 /* Long long is aligned on the stack. So are other 2 word
11033 items such as complex int due to a historical mistake. */
11035 cum->words += cum->words & 1;
11036 cum->words += n_words;
11039 /* Note: continuing to accumulate gregno past when we've started
11040 spilling to the stack indicates the fact that we've started
11041 spilling to the stack to expand_builtin_saveregs. */
11042 cum->sysv_gregno = gregno + n_words;
11045 if (TARGET_DEBUG_ARG)
11047 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11048 cum->words, cum->fregno);
11049 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
11050 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
11051 fprintf (stderr, "mode = %4s, named = %d\n",
11052 GET_MODE_NAME (mode), named);
11057 int n_words = rs6000_arg_size (mode, type);
11058 int start_words = cum->words;
11059 int align_words = rs6000_parm_start (mode, type, start_words);
11061 cum->words = align_words + n_words;
11063 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT)
11065 /* _Decimal128 must be passed in an even/odd float register pair.
11066 This assumes that the register number is odd when fregno is
11068 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11070 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11073 if (TARGET_DEBUG_ARG)
11075 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11076 cum->words, cum->fregno);
11077 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11078 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11079 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11080 named, align_words - start_words, depth);
11086 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11087 const_tree type, bool named)
11089 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11093 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11094 structure between cum->intoffset and bitpos to integer registers. */
11097 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11098 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11101 unsigned int regno;
11102 unsigned int startbit, endbit;
11103 int this_regno, intregs, intoffset;
11106 if (cum->intoffset == -1)
11109 intoffset = cum->intoffset;
11110 cum->intoffset = -1;
11112 /* If this is the trailing part of a word, try to only load that
11113 much into the register. Otherwise load the whole register. Note
11114 that in the latter case we may pick up unwanted bits. It's not a
11115 problem at the moment but may wish to revisit. */
11117 if (intoffset % BITS_PER_WORD != 0)
11119 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
11120 if (!int_mode_for_size (bits, 0).exists (&mode))
11122 /* We couldn't find an appropriate mode, which happens,
11123 e.g., in packed structs when there are 3 bytes to load.
11124 Back intoffset back to the beginning of the word in this
11126 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11133 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11134 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11135 intregs = (endbit - startbit) / BITS_PER_WORD;
11136 this_regno = cum->words + intoffset / BITS_PER_WORD;
11138 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11139 cum->use_stack = 1;
11141 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
11145 intoffset /= BITS_PER_UNIT;
11148 regno = GP_ARG_MIN_REG + this_regno;
11149 reg = gen_rtx_REG (mode, regno);
11151 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
11154 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
11158 while (intregs > 0);
11161 /* Recursive workhorse for the following. */
11164 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
11165 HOST_WIDE_INT startbitpos, rtx rvec[],
11170 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11171 if (TREE_CODE (f) == FIELD_DECL)
11173 HOST_WIDE_INT bitpos = startbitpos;
11174 tree ftype = TREE_TYPE (f);
11176 if (ftype == error_mark_node)
11178 mode = TYPE_MODE (ftype);
11180 if (DECL_SIZE (f) != 0
11181 && tree_fits_uhwi_p (bit_position (f)))
11182 bitpos += int_bit_position (f);
11184 /* ??? FIXME: else assume zero offset. */
11186 if (TREE_CODE (ftype) == RECORD_TYPE)
11187 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
11188 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
11190 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
11194 case E_SCmode: mode = SFmode; break;
11195 case E_DCmode: mode = DFmode; break;
11196 case E_TCmode: mode = TFmode; break;
11200 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11201 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
11203 gcc_assert (cum->fregno == FP_ARG_MAX_REG
11204 && (mode == TFmode || mode == TDmode));
11205 /* Long double or _Decimal128 split over regs and memory. */
11206 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
11210 = gen_rtx_EXPR_LIST (VOIDmode,
11211 gen_rtx_REG (mode, cum->fregno++),
11212 GEN_INT (bitpos / BITS_PER_UNIT));
11213 if (FLOAT128_2REG_P (mode))
11216 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11218 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11220 = gen_rtx_EXPR_LIST (VOIDmode,
11221 gen_rtx_REG (mode, cum->vregno++),
11222 GEN_INT (bitpos / BITS_PER_UNIT));
11224 else if (cum->intoffset == -1)
11225 cum->intoffset = bitpos;
11229 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
11230 the register(s) to be used for each field and subfield of a struct
11231 being passed by value, along with the offset of where the
11232 register's value may be found in the block. FP fields go in FP
11233 register, vector fields go in vector registers, and everything
11234 else goes in int registers, packed as in memory.
11236 This code is also used for function return values. RETVAL indicates
11237 whether this is the case.
11239 Much of this is taken from the SPARC V9 port, which has a similar
11240 calling convention. */
11243 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
11244 bool named, bool retval)
11246 rtx rvec[FIRST_PSEUDO_REGISTER];
11247 int k = 1, kbase = 1;
11248 HOST_WIDE_INT typesize = int_size_in_bytes (type);
11249 /* This is a copy; modifications are not visible to our caller. */
11250 CUMULATIVE_ARGS copy_cum = *orig_cum;
11251 CUMULATIVE_ARGS *cum = ©_cum;
11253 /* Pad to 16 byte boundary if needed. */
11254 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11255 && (cum->words % 2) != 0)
11258 cum->intoffset = 0;
11259 cum->use_stack = 0;
11260 cum->named = named;
11262 /* Put entries into rvec[] for individual FP and vector fields, and
11263 for the chunks of memory that go in int regs. Note we start at
11264 element 1; 0 is reserved for an indication of using memory, and
11265 may or may not be filled in below. */
11266 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
11267 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
11269 /* If any part of the struct went on the stack put all of it there.
11270 This hack is because the generic code for
11271 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
11272 parts of the struct are not at the beginning. */
11273 if (cum->use_stack)
11276 return NULL_RTX; /* doesn't go in registers at all */
11278 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11280 if (k > 1 || cum->use_stack)
11281 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
11286 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
11289 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
11294 rtx rvec[GP_ARG_NUM_REG + 1];
11296 if (align_words >= GP_ARG_NUM_REG)
11299 n_units = rs6000_arg_size (mode, type);
11301 /* Optimize the simple case where the arg fits in one gpr, except in
11302 the case of BLKmode due to assign_parms assuming that registers are
11303 BITS_PER_WORD wide. */
11305 || (n_units == 1 && mode != BLKmode))
11306 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11309 if (align_words + n_units > GP_ARG_NUM_REG)
11310 /* Not all of the arg fits in gprs. Say that it goes in memory too,
11311 using a magic NULL_RTX component.
11312 This is not strictly correct. Only some of the arg belongs in
11313 memory, not all of it. However, the normal scheme using
11314 function_arg_partial_nregs can result in unusual subregs, eg.
11315 (subreg:SI (reg:DF) 4), which are not handled well. The code to
11316 store the whole arg to memory is often more efficient than code
11317 to store pieces, and we know that space is available in the right
11318 place for the whole arg. */
11319 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11324 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
11325 rtx off = GEN_INT (i++ * 4);
11326 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11328 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
11330 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11333 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
11334 but must also be copied into the parameter save area starting at
11335 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
11336 to the GPRs and/or memory. Return the number of elements used. */
11339 rs6000_psave_function_arg (machine_mode mode, const_tree type,
11340 int align_words, rtx *rvec)
11344 if (align_words < GP_ARG_NUM_REG)
11346 int n_words = rs6000_arg_size (mode, type);
11348 if (align_words + n_words > GP_ARG_NUM_REG
11350 || (TARGET_32BIT && TARGET_POWERPC64))
11352 /* If this is partially on the stack, then we only
11353 include the portion actually in registers here. */
11354 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11357 if (align_words + n_words > GP_ARG_NUM_REG)
11359 /* Not all of the arg fits in gprs. Say that it goes in memory
11360 too, using a magic NULL_RTX component. Also see comment in
11361 rs6000_mixed_function_arg for why the normal
11362 function_arg_partial_nregs scheme doesn't work in this case. */
11363 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11368 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11369 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
11370 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11372 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11376 /* The whole arg fits in gprs. */
11377 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11378 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
11383 /* It's entirely in memory. */
11384 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11390 /* RVEC is a vector of K components of an argument of mode MODE.
11391 Construct the final function_arg return value from it. */
11394 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
11396 gcc_assert (k >= 1);
11398 /* Avoid returning a PARALLEL in the trivial cases. */
11401 if (XEXP (rvec[0], 0) == NULL_RTX)
11404 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
11405 return XEXP (rvec[0], 0);
11408 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11411 /* Determine where to put an argument to a function.
11412 Value is zero to push the argument on the stack,
11413 or a hard register in which to store the argument.
11415 MODE is the argument's machine mode.
11416 TYPE is the data type of the argument (as a tree).
11417 This is null for libcalls where that information may
11419 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11420 the preceding args and about the function being called. It is
11421 not modified in this routine.
11422 NAMED is nonzero if this argument is a named parameter
11423 (otherwise it is an extra parameter matching an ellipsis).
11425 On RS/6000 the first eight words of non-FP are normally in registers
11426 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
11427 Under V.4, the first 8 FP args are in registers.
11429 If this is floating-point and no prototype is specified, we use
11430 both an FP and integer register (or possibly FP reg and stack). Library
11431 functions (when CALL_LIBCALL is set) always have the proper types for args,
11432 so we can pass the FP value just in one register. emit_library_function
11433 doesn't support PARALLEL anyway.
11435 Note that for args passed by reference, function_arg will be called
11436 with MODE and TYPE set to that of the pointer to the arg, not the arg
11440 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
11441 const_tree type, bool named)
11443 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11444 enum rs6000_abi abi = DEFAULT_ABI;
11445 machine_mode elt_mode;
11448 /* Return a marker to indicate whether CR1 needs to set or clear the
11449 bit that V.4 uses to say fp args were passed in registers.
11450 Assume that we don't need the marker for software floating point,
11451 or compiler generated library calls. */
11452 if (mode == VOIDmode)
11455 && (cum->call_cookie & CALL_LIBCALL) == 0
11457 || (cum->nargs_prototype < 0
11458 && (cum->prototype || TARGET_NO_PROTOTYPE)))
11459 && TARGET_HARD_FLOAT)
11460 return GEN_INT (cum->call_cookie
11461 | ((cum->fregno == FP_ARG_MIN_REG)
11462 ? CALL_V4_SET_FP_ARGS
11463 : CALL_V4_CLEAR_FP_ARGS));
11465 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
11468 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11470 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11472 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
11473 if (rslt != NULL_RTX)
11475 /* Else fall through to usual handling. */
11478 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11480 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11484 /* Do we also need to pass this argument in the parameter save area?
11485 Library support functions for IEEE 128-bit are assumed to not need the
11486 value passed both in GPRs and in vector registers. */
11487 if (TARGET_64BIT && !cum->prototype
11488 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11490 int align_words = ROUND_UP (cum->words, 2);
11491 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11494 /* Describe where this argument goes in the vector registers. */
11495 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
11497 r = gen_rtx_REG (elt_mode, cum->vregno + i);
11498 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11499 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11502 return rs6000_finish_function_arg (mode, rvec, k);
11504 else if (TARGET_ALTIVEC_ABI
11505 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
11506 || (type && TREE_CODE (type) == VECTOR_TYPE
11507 && int_size_in_bytes (type) == 16)))
11509 if (named || abi == ABI_V4)
11513 /* Vector parameters to varargs functions under AIX or Darwin
11514 get passed in memory and possibly also in GPRs. */
11515 int align, align_words, n_words;
11516 machine_mode part_mode;
11518 /* Vector parameters must be 16-byte aligned. In 32-bit
11519 mode this means we need to take into account the offset
11520 to the parameter save area. In 64-bit mode, they just
11521 have to start on an even word, since the parameter save
11522 area is 16-byte aligned. */
11524 align = -(rs6000_parm_offset () + cum->words) & 3;
11526 align = cum->words & 1;
11527 align_words = cum->words + align;
11529 /* Out of registers? Memory, then. */
11530 if (align_words >= GP_ARG_NUM_REG)
11533 if (TARGET_32BIT && TARGET_POWERPC64)
11534 return rs6000_mixed_function_arg (mode, type, align_words);
11536 /* The vector value goes in GPRs. Only the part of the
11537 value in GPRs is reported here. */
11539 n_words = rs6000_arg_size (mode, type);
11540 if (align_words + n_words > GP_ARG_NUM_REG)
11541 /* Fortunately, there are only two possibilities, the value
11542 is either wholly in GPRs or half in GPRs and half not. */
11543 part_mode = DImode;
11545 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
11549 else if (abi == ABI_V4)
11551 if (abi_v4_pass_in_fpr (mode, named))
11553 /* _Decimal128 must use an even/odd register pair. This assumes
11554 that the register number is odd when fregno is odd. */
11555 if (mode == TDmode && (cum->fregno % 2) == 1)
11558 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11559 <= FP_ARG_V4_MAX_REG)
11560 return gen_rtx_REG (mode, cum->fregno);
11566 int n_words = rs6000_arg_size (mode, type);
11567 int gregno = cum->sysv_gregno;
11569 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
11570 As does any other 2 word item such as complex int due to a
11571 historical mistake. */
11573 gregno += (1 - gregno) & 1;
11575 /* Multi-reg args are not split between registers and stack. */
11576 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11579 if (TARGET_32BIT && TARGET_POWERPC64)
11580 return rs6000_mixed_function_arg (mode, type,
11581 gregno - GP_ARG_MIN_REG);
11582 return gen_rtx_REG (mode, gregno);
11587 int align_words = rs6000_parm_start (mode, type, cum->words);
11589 /* _Decimal128 must be passed in an even/odd float register pair.
11590 This assumes that the register number is odd when fregno is odd. */
11591 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11594 if (USE_FP_FOR_ARG_P (cum, elt_mode)
11595 && !(TARGET_AIX && !TARGET_ELF
11596 && type != NULL && AGGREGATE_TYPE_P (type)))
11598 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11601 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11604 /* Do we also need to pass this argument in the parameter
11606 if (type && (cum->nargs_prototype <= 0
11607 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11608 && TARGET_XL_COMPAT
11609 && align_words >= GP_ARG_NUM_REG)))
11610 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11612 /* Describe where this argument goes in the fprs. */
11613 for (i = 0; i < n_elts
11614 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
11616 /* Check if the argument is split over registers and memory.
11617 This can only ever happen for long double or _Decimal128;
11618 complex types are handled via split_complex_arg. */
11619 machine_mode fmode = elt_mode;
11620 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
11622 gcc_assert (FLOAT128_2REG_P (fmode));
11623 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
11626 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
11627 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11628 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11631 /* If there were not enough FPRs to hold the argument, the rest
11632 usually goes into memory. However, if the current position
11633 is still within the register parameter area, a portion may
11634 actually have to go into GPRs.
11636 Note that it may happen that the portion of the argument
11637 passed in the first "half" of the first GPR was already
11638 passed in the last FPR as well.
11640 For unnamed arguments, we already set up GPRs to cover the
11641 whole argument in rs6000_psave_function_arg, so there is
11642 nothing further to do at this point. */
11643 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
11644 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
11645 && cum->nargs_prototype > 0)
11647 static bool warned;
11649 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11650 int n_words = rs6000_arg_size (mode, type);
11652 align_words += fpr_words;
11653 n_words -= fpr_words;
11657 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11658 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
11659 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11661 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11663 if (!warned && warn_psabi)
11666 inform (input_location,
11667 "the ABI of passing homogeneous %<float%> aggregates"
11668 " has changed in GCC 5");
11672 return rs6000_finish_function_arg (mode, rvec, k);
11674 else if (align_words < GP_ARG_NUM_REG)
11676 if (TARGET_32BIT && TARGET_POWERPC64)
11677 return rs6000_mixed_function_arg (mode, type, align_words);
11679 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11686 /* For an arg passed partly in registers and partly in memory, this is
11687 the number of bytes passed in registers. For args passed entirely in
11688 registers or entirely in memory, zero. When an arg is described by a
11689 PARALLEL, perhaps using more than one register type, this function
11690 returns the number of bytes used by the first element of the PARALLEL. */
11693 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
11694 tree type, bool named)
11696 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11697 bool passed_in_gprs = true;
11700 machine_mode elt_mode;
11703 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11705 if (DEFAULT_ABI == ABI_V4)
11708 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11710 /* If we are passing this arg in the fixed parameter save area (gprs or
11711 memory) as well as VRs, we do not use the partial bytes mechanism;
11712 instead, rs6000_function_arg will return a PARALLEL including a memory
11713 element as necessary. Library support functions for IEEE 128-bit are
11714 assumed to not need the value passed both in GPRs and in vector
11716 if (TARGET_64BIT && !cum->prototype
11717 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11720 /* Otherwise, we pass in VRs only. Check for partial copies. */
11721 passed_in_gprs = false;
11722 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
11723 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
11726 /* In this complicated case we just disable the partial_nregs code. */
11727 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11730 align_words = rs6000_parm_start (mode, type, cum->words);
11732 if (USE_FP_FOR_ARG_P (cum, elt_mode)
11733 && !(TARGET_AIX && !TARGET_ELF
11734 && type != NULL && AGGREGATE_TYPE_P (type)))
11736 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11738 /* If we are passing this arg in the fixed parameter save area
11739 (gprs or memory) as well as FPRs, we do not use the partial
11740 bytes mechanism; instead, rs6000_function_arg will return a
11741 PARALLEL including a memory element as necessary. */
11743 && (cum->nargs_prototype <= 0
11744 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11745 && TARGET_XL_COMPAT
11746 && align_words >= GP_ARG_NUM_REG)))
11749 /* Otherwise, we pass in FPRs only. Check for partial copies. */
11750 passed_in_gprs = false;
11751 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
11753 /* Compute number of bytes / words passed in FPRs. If there
11754 is still space available in the register parameter area
11755 *after* that amount, a part of the argument will be passed
11756 in GPRs. In that case, the total amount passed in any
11757 registers is equal to the amount that would have been passed
11758 in GPRs if everything were passed there, so we fall back to
11759 the GPR code below to compute the appropriate value. */
11760 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
11761 * MIN (8, GET_MODE_SIZE (elt_mode)));
11762 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
11764 if (align_words + fpr_words < GP_ARG_NUM_REG)
11765 passed_in_gprs = true;
11772 && align_words < GP_ARG_NUM_REG
11773 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
11774 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
11776 if (ret != 0 && TARGET_DEBUG_ARG)
11777 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
11782 /* A C expression that indicates when an argument must be passed by
11783 reference. If nonzero for an argument, a copy of that argument is
11784 made in memory and a pointer to the argument is passed instead of
11785 the argument itself. The pointer is passed in whatever way is
11786 appropriate for passing a pointer to that type.
11788 Under V.4, aggregates and long double are passed by reference.
11790 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
11791 reference unless the AltiVec vector extension ABI is in force.
11793 As an extension to all ABIs, variable sized types are passed by
11797 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
11798 machine_mode mode, const_tree type,
11799 bool named ATTRIBUTE_UNUSED)
11804 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11805 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11807 if (TARGET_DEBUG_ARG)
11808 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
11812 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
11814 if (TARGET_DEBUG_ARG)
11815 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
11819 if (int_size_in_bytes (type) < 0)
11821 if (TARGET_DEBUG_ARG)
11822 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
11826 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11827 modes only exist for GCC vector types if -maltivec. */
11828 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
11830 if (TARGET_DEBUG_ARG)
11831 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
11835 /* Pass synthetic vectors in memory. */
11836 if (TREE_CODE (type) == VECTOR_TYPE
11837 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11839 static bool warned_for_pass_big_vectors = false;
11840 if (TARGET_DEBUG_ARG)
11841 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
11842 if (!warned_for_pass_big_vectors)
11844 warning (OPT_Wpsabi, "GCC vector passed by reference: "
11845 "non-standard ABI extension with no compatibility "
11847 warned_for_pass_big_vectors = true;
11855 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
11856 already processes. Return true if the parameter must be passed
11857 (fully or partially) on the stack. */
11860 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
11866 /* Catch errors. */
11867 if (type == NULL || type == error_mark_node)
11870 /* Handle types with no storage requirement. */
11871 if (TYPE_MODE (type) == VOIDmode)
11874 /* Handle complex types. */
11875 if (TREE_CODE (type) == COMPLEX_TYPE)
11876 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
11877 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
11879 /* Handle transparent aggregates. */
11880 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
11881 && TYPE_TRANSPARENT_AGGR (type))
11882 type = TREE_TYPE (first_field (type));
11884 /* See if this arg was passed by invisible reference. */
11885 if (pass_by_reference (get_cumulative_args (args_so_far),
11886 TYPE_MODE (type), type, true))
11887 type = build_pointer_type (type);
11889 /* Find mode as it is passed by the ABI. */
11890 unsignedp = TYPE_UNSIGNED (type);
11891 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
11893 /* If we must pass in stack, we need a stack. */
11894 if (rs6000_must_pass_in_stack (mode, type))
11897 /* If there is no incoming register, we need a stack. */
11898 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
11899 if (entry_parm == NULL)
11902 /* Likewise if we need to pass both in registers and on the stack. */
11903 if (GET_CODE (entry_parm) == PARALLEL
11904 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
11907 /* Also true if we're partially in registers and partially not. */
11908 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
11911 /* Update info on where next arg arrives in registers. */
11912 rs6000_function_arg_advance (args_so_far, mode, type, true);
11916 /* Return true if FUN has no prototype, has a variable argument
11917 list, or passes any parameter in memory. */
11920 rs6000_function_parms_need_stack (tree fun, bool incoming)
11922 tree fntype, result;
11923 CUMULATIVE_ARGS args_so_far_v;
11924 cumulative_args_t args_so_far;
11927 /* Must be a libcall, all of which only use reg parms. */
11932 fntype = TREE_TYPE (fun);
11934 /* Varargs functions need the parameter save area. */
11935 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
11938 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
11939 args_so_far = pack_cumulative_args (&args_so_far_v);
11941 /* When incoming, we will have been passed the function decl.
11942 It is necessary to use the decl to handle K&R style functions,
11943 where TYPE_ARG_TYPES may not be available. */
11946 gcc_assert (DECL_P (fun));
11947 result = DECL_RESULT (fun);
11950 result = TREE_TYPE (fntype);
11952 if (result && aggregate_value_p (result, fntype))
11954 if (!TYPE_P (result))
11955 result = TREE_TYPE (result);
11956 result = build_pointer_type (result);
11957 rs6000_parm_needs_stack (args_so_far, result);
11964 for (parm = DECL_ARGUMENTS (fun);
11965 parm && parm != void_list_node;
11966 parm = TREE_CHAIN (parm))
11967 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
11972 function_args_iterator args_iter;
11975 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
11976 if (rs6000_parm_needs_stack (args_so_far, arg_type))
11983 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
11984 usually a constant depending on the ABI. However, in the ELFv2 ABI
11985 the register parameter area is optional when calling a function that
11986 has a prototype is scope, has no variable argument list, and passes
11987 all parameters in registers. */
11990 rs6000_reg_parm_stack_space (tree fun, bool incoming)
11992 int reg_parm_stack_space;
11994 switch (DEFAULT_ABI)
11997 reg_parm_stack_space = 0;
12002 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12006 /* ??? Recomputing this every time is a bit expensive. Is there
12007 a place to cache this information? */
12008 if (rs6000_function_parms_need_stack (fun, incoming))
12009 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12011 reg_parm_stack_space = 0;
12015 return reg_parm_stack_space;
12019 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
12022 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
12027 for (i = 0; i < nregs; i++)
12029 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
12030 if (reload_completed)
12032 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
12035 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
12036 i * GET_MODE_SIZE (reg_mode));
12039 tem = replace_equiv_address (tem, XEXP (tem, 0));
12043 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
12047 /* Perform any needed actions needed for a function that is receiving a
12048 variable number of arguments.
12052 MODE and TYPE are the mode and type of the current parameter.
12054 PRETEND_SIZE is a variable that should be set to the amount of stack
12055 that must be pushed by the prolog to pretend that our caller pushed
12058 Normally, this macro will push all remaining incoming registers on the
12059 stack and set PRETEND_SIZE to the length of the registers pushed. */
12062 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12063 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12066 CUMULATIVE_ARGS next_cum;
12067 int reg_size = TARGET_32BIT ? 4 : 8;
12068 rtx save_area = NULL_RTX, mem;
12069 int first_reg_offset;
12070 alias_set_type set;
12072 /* Skip the last named argument. */
12073 next_cum = *get_cumulative_args (cum);
12074 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12076 if (DEFAULT_ABI == ABI_V4)
12078 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12082 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12083 HOST_WIDE_INT offset = 0;
12085 /* Try to optimize the size of the varargs save area.
12086 The ABI requires that ap.reg_save_area is doubleword
12087 aligned, but we don't need to allocate space for all
12088 the bytes, only those to which we actually will save
12090 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12091 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12092 if (TARGET_HARD_FLOAT
12093 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12094 && cfun->va_list_fpr_size)
12097 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12098 * UNITS_PER_FP_WORD;
12099 if (cfun->va_list_fpr_size
12100 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12101 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12103 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12104 * UNITS_PER_FP_WORD;
12108 offset = -((first_reg_offset * reg_size) & ~7);
12109 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12111 gpr_reg_num = cfun->va_list_gpr_size;
12112 if (reg_size == 4 && (first_reg_offset & 1))
12115 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12118 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12119 * UNITS_PER_FP_WORD
12120 - (int) (GP_ARG_NUM_REG * reg_size);
12122 if (gpr_size + fpr_size)
12125 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12126 gcc_assert (MEM_P (reg_save_area));
12127 reg_save_area = XEXP (reg_save_area, 0);
12128 if (GET_CODE (reg_save_area) == PLUS)
12130 gcc_assert (XEXP (reg_save_area, 0)
12131 == virtual_stack_vars_rtx);
12132 gcc_assert (CONST_INT_P (XEXP (reg_save_area, 1)));
12133 offset += INTVAL (XEXP (reg_save_area, 1));
12136 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
12139 cfun->machine->varargs_save_offset = offset;
12140 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
12145 first_reg_offset = next_cum.words;
12146 save_area = crtl->args.internal_arg_pointer;
12148 if (targetm.calls.must_pass_in_stack (mode, type))
12149 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
12152 set = get_varargs_alias_set ();
12153 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
12154 && cfun->va_list_gpr_size)
12156 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
12158 if (va_list_gpr_counter_field)
12159 /* V4 va_list_gpr_size counts number of registers needed. */
12160 n_gpr = cfun->va_list_gpr_size;
12162 /* char * va_list instead counts number of bytes needed. */
12163 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
12168 mem = gen_rtx_MEM (BLKmode,
12169 plus_constant (Pmode, save_area,
12170 first_reg_offset * reg_size));
12171 MEM_NOTRAP_P (mem) = 1;
12172 set_mem_alias_set (mem, set);
12173 set_mem_align (mem, BITS_PER_WORD);
12175 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
12179 /* Save FP registers if needed. */
12180 if (DEFAULT_ABI == ABI_V4
12181 && TARGET_HARD_FLOAT
12183 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12184 && cfun->va_list_fpr_size)
12186 int fregno = next_cum.fregno, nregs;
12187 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
12188 rtx lab = gen_label_rtx ();
12189 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
12190 * UNITS_PER_FP_WORD);
12193 (gen_rtx_SET (pc_rtx,
12194 gen_rtx_IF_THEN_ELSE (VOIDmode,
12195 gen_rtx_NE (VOIDmode, cr1,
12197 gen_rtx_LABEL_REF (VOIDmode, lab),
12201 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
12202 fregno++, off += UNITS_PER_FP_WORD, nregs++)
12204 mem = gen_rtx_MEM (TARGET_HARD_FLOAT ? DFmode : SFmode,
12205 plus_constant (Pmode, save_area, off));
12206 MEM_NOTRAP_P (mem) = 1;
12207 set_mem_alias_set (mem, set);
12208 set_mem_align (mem, GET_MODE_ALIGNMENT (
12209 TARGET_HARD_FLOAT ? DFmode : SFmode));
12210 emit_move_insn (mem, gen_rtx_REG (
12211 TARGET_HARD_FLOAT ? DFmode : SFmode, fregno));
12218 /* Create the va_list data type. */
12221 rs6000_build_builtin_va_list (void)
12223 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
12225 /* For AIX, prefer 'char *' because that's what the system
12226 header files like. */
12227 if (DEFAULT_ABI != ABI_V4)
12228 return build_pointer_type (char_type_node);
12230 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
12231 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
12232 get_identifier ("__va_list_tag"), record);
12234 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
12235 unsigned_char_type_node);
12236 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
12237 unsigned_char_type_node);
12238 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
12239 every user file. */
12240 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12241 get_identifier ("reserved"), short_unsigned_type_node);
12242 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12243 get_identifier ("overflow_arg_area"),
12245 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12246 get_identifier ("reg_save_area"),
12249 va_list_gpr_counter_field = f_gpr;
12250 va_list_fpr_counter_field = f_fpr;
12252 DECL_FIELD_CONTEXT (f_gpr) = record;
12253 DECL_FIELD_CONTEXT (f_fpr) = record;
12254 DECL_FIELD_CONTEXT (f_res) = record;
12255 DECL_FIELD_CONTEXT (f_ovf) = record;
12256 DECL_FIELD_CONTEXT (f_sav) = record;
12258 TYPE_STUB_DECL (record) = type_decl;
12259 TYPE_NAME (record) = type_decl;
12260 TYPE_FIELDS (record) = f_gpr;
12261 DECL_CHAIN (f_gpr) = f_fpr;
12262 DECL_CHAIN (f_fpr) = f_res;
12263 DECL_CHAIN (f_res) = f_ovf;
12264 DECL_CHAIN (f_ovf) = f_sav;
12266 layout_type (record);
12268 /* The correct type is an array type of one element. */
12269 return build_array_type (record, build_index_type (size_zero_node));
12272 /* Implement va_start. */
12275 rs6000_va_start (tree valist, rtx nextarg)
12277 HOST_WIDE_INT words, n_gpr, n_fpr;
12278 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12279 tree gpr, fpr, ovf, sav, t;
12281 /* Only SVR4 needs something special. */
12282 if (DEFAULT_ABI != ABI_V4)
12284 std_expand_builtin_va_start (valist, nextarg);
12288 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12289 f_fpr = DECL_CHAIN (f_gpr);
12290 f_res = DECL_CHAIN (f_fpr);
12291 f_ovf = DECL_CHAIN (f_res);
12292 f_sav = DECL_CHAIN (f_ovf);
12294 valist = build_simple_mem_ref (valist);
12295 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12296 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12298 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12300 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12303 /* Count number of gp and fp argument registers used. */
12304 words = crtl->args.info.words;
12305 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
12307 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
12310 if (TARGET_DEBUG_ARG)
12311 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
12312 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
12313 words, n_gpr, n_fpr);
12315 if (cfun->va_list_gpr_size)
12317 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12318 build_int_cst (NULL_TREE, n_gpr));
12319 TREE_SIDE_EFFECTS (t) = 1;
12320 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12323 if (cfun->va_list_fpr_size)
12325 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12326 build_int_cst (NULL_TREE, n_fpr));
12327 TREE_SIDE_EFFECTS (t) = 1;
12328 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12330 #ifdef HAVE_AS_GNU_ATTRIBUTE
12331 if (call_ABI_of_interest (cfun->decl))
12332 rs6000_passes_float = true;
12336 /* Find the overflow area. */
12337 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
12339 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
12340 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12341 TREE_SIDE_EFFECTS (t) = 1;
12342 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12344 /* If there were no va_arg invocations, don't set up the register
12346 if (!cfun->va_list_gpr_size
12347 && !cfun->va_list_fpr_size
12348 && n_gpr < GP_ARG_NUM_REG
12349 && n_fpr < FP_ARG_V4_MAX_REG)
12352 /* Find the register save area. */
12353 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
12354 if (cfun->machine->varargs_save_offset)
12355 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
12356 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12357 TREE_SIDE_EFFECTS (t) = 1;
12358 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12361 /* Implement va_arg. */
12364 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12365 gimple_seq *post_p)
12367 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12368 tree gpr, fpr, ovf, sav, reg, t, u;
12369 int size, rsize, n_reg, sav_ofs, sav_scale;
12370 tree lab_false, lab_over, addr;
12372 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
12376 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12378 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
12379 return build_va_arg_indirect_ref (t);
12382 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
12383 earlier version of gcc, with the property that it always applied alignment
12384 adjustments to the va-args (even for zero-sized types). The cheapest way
12385 to deal with this is to replicate the effect of the part of
12386 std_gimplify_va_arg_expr that carries out the align adjust, for the case
12388 We don't need to check for pass-by-reference because of the test above.
12389 We can return a simplifed answer, since we know there's no offset to add. */
12392 && rs6000_darwin64_abi)
12393 || DEFAULT_ABI == ABI_ELFv2
12394 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
12395 && integer_zerop (TYPE_SIZE (type)))
12397 unsigned HOST_WIDE_INT align, boundary;
12398 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
12399 align = PARM_BOUNDARY / BITS_PER_UNIT;
12400 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
12401 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
12402 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
12403 boundary /= BITS_PER_UNIT;
12404 if (boundary > align)
12407 /* This updates arg ptr by the amount that would be necessary
12408 to align the zero-sized (but not zero-alignment) item. */
12409 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12410 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
12411 gimplify_and_add (t, pre_p);
12413 t = fold_convert (sizetype, valist_tmp);
12414 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12415 fold_convert (TREE_TYPE (valist),
12416 fold_build2 (BIT_AND_EXPR, sizetype, t,
12417 size_int (-boundary))));
12418 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
12419 gimplify_and_add (t, pre_p);
12421 /* Since it is zero-sized there's no increment for the item itself. */
12422 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
12423 return build_va_arg_indirect_ref (valist_tmp);
12426 if (DEFAULT_ABI != ABI_V4)
12428 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
12430 tree elem_type = TREE_TYPE (type);
12431 machine_mode elem_mode = TYPE_MODE (elem_type);
12432 int elem_size = GET_MODE_SIZE (elem_mode);
12434 if (elem_size < UNITS_PER_WORD)
12436 tree real_part, imag_part;
12437 gimple_seq post = NULL;
12439 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12441 /* Copy the value into a temporary, lest the formal temporary
12442 be reused out from under us. */
12443 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
12444 gimple_seq_add_seq (pre_p, post);
12446 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12449 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
12453 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
12456 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12457 f_fpr = DECL_CHAIN (f_gpr);
12458 f_res = DECL_CHAIN (f_fpr);
12459 f_ovf = DECL_CHAIN (f_res);
12460 f_sav = DECL_CHAIN (f_ovf);
12462 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12463 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12465 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12467 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12470 size = int_size_in_bytes (type);
12471 rsize = (size + 3) / 4;
12472 int pad = 4 * rsize - size;
12475 machine_mode mode = TYPE_MODE (type);
12476 if (abi_v4_pass_in_fpr (mode, false))
12478 /* FP args go in FP registers, if present. */
12480 n_reg = (size + 7) / 8;
12481 sav_ofs = (TARGET_HARD_FLOAT ? 8 : 4) * 4;
12482 sav_scale = (TARGET_HARD_FLOAT ? 8 : 4);
12483 if (mode != SFmode && mode != SDmode)
12488 /* Otherwise into GP registers. */
12497 /* Pull the value out of the saved registers.... */
12500 addr = create_tmp_var (ptr_type_node, "addr");
12502 /* AltiVec vectors never go in registers when -mabi=altivec. */
12503 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12507 lab_false = create_artificial_label (input_location);
12508 lab_over = create_artificial_label (input_location);
12510 /* Long long is aligned in the registers. As are any other 2 gpr
12511 item such as complex int due to a historical mistake. */
12513 if (n_reg == 2 && reg == gpr)
12516 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12517 build_int_cst (TREE_TYPE (reg), n_reg - 1));
12518 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
12519 unshare_expr (reg), u);
12521 /* _Decimal128 is passed in even/odd fpr pairs; the stored
12522 reg number is 0 for f1, so we want to make it odd. */
12523 else if (reg == fpr && mode == TDmode)
12525 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12526 build_int_cst (TREE_TYPE (reg), 1));
12527 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
12530 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
12531 t = build2 (GE_EXPR, boolean_type_node, u, t);
12532 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12533 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12534 gimplify_and_add (t, pre_p);
12538 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12540 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12541 build_int_cst (TREE_TYPE (reg), n_reg));
12542 u = fold_convert (sizetype, u);
12543 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
12544 t = fold_build_pointer_plus (t, u);
12546 /* _Decimal32 varargs are located in the second word of the 64-bit
12547 FP register for 32-bit binaries. */
12548 if (TARGET_32BIT && TARGET_HARD_FLOAT && mode == SDmode)
12549 t = fold_build_pointer_plus_hwi (t, size);
12551 /* Args are passed right-aligned. */
12552 if (BYTES_BIG_ENDIAN)
12553 t = fold_build_pointer_plus_hwi (t, pad);
12555 gimplify_assign (addr, t, pre_p);
12557 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12559 stmt = gimple_build_label (lab_false);
12560 gimple_seq_add_stmt (pre_p, stmt);
12562 if ((n_reg == 2 && !regalign) || n_reg > 2)
12564 /* Ensure that we don't find any more args in regs.
12565 Alignment has taken care of for special cases. */
12566 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
12570 /* ... otherwise out of the overflow area. */
12572 /* Care for on-stack alignment if needed. */
12576 t = fold_build_pointer_plus_hwi (t, align - 1);
12577 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12578 build_int_cst (TREE_TYPE (t), -align));
12581 /* Args are passed right-aligned. */
12582 if (BYTES_BIG_ENDIAN)
12583 t = fold_build_pointer_plus_hwi (t, pad);
12585 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12587 gimplify_assign (unshare_expr (addr), t, pre_p);
12589 t = fold_build_pointer_plus_hwi (t, size);
12590 gimplify_assign (unshare_expr (ovf), t, pre_p);
12594 stmt = gimple_build_label (lab_over);
12595 gimple_seq_add_stmt (pre_p, stmt);
12598 if (STRICT_ALIGNMENT
12599 && (TYPE_ALIGN (type)
12600 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
12602 /* The value (of type complex double, for example) may not be
12603 aligned in memory in the saved registers, so copy via a
12604 temporary. (This is the same code as used for SPARC.) */
12605 tree tmp = create_tmp_var (type, "va_arg_tmp");
12606 tree dest_addr = build_fold_addr_expr (tmp);
12608 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
12609 3, dest_addr, addr, size_int (rsize * 4));
12610 TREE_ADDRESSABLE (tmp) = 1;
12612 gimplify_and_add (copy, pre_p);
12616 addr = fold_convert (ptrtype, addr);
12617 return build_va_arg_indirect_ref (addr);
12623 def_builtin (const char *name, tree type, enum rs6000_builtins code)
12626 unsigned classify = rs6000_builtin_info[(int)code].attr;
12627 const char *attr_string = "";
12629 gcc_assert (name != NULL);
12630 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
12632 if (rs6000_builtin_decls[(int)code])
12633 fatal_error (input_location,
12634 "internal error: builtin function %qs already processed",
12637 rs6000_builtin_decls[(int)code] = t =
12638 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
12640 /* Set any special attributes. */
12641 if ((classify & RS6000_BTC_CONST) != 0)
12643 /* const function, function only depends on the inputs. */
12644 TREE_READONLY (t) = 1;
12645 TREE_NOTHROW (t) = 1;
12646 attr_string = ", const";
12648 else if ((classify & RS6000_BTC_PURE) != 0)
12650 /* pure function, function can read global memory, but does not set any
12652 DECL_PURE_P (t) = 1;
12653 TREE_NOTHROW (t) = 1;
12654 attr_string = ", pure";
12656 else if ((classify & RS6000_BTC_FP) != 0)
12658 /* Function is a math function. If rounding mode is on, then treat the
12659 function as not reading global memory, but it can have arbitrary side
12660 effects. If it is off, then assume the function is a const function.
12661 This mimics the ATTR_MATHFN_FPROUNDING attribute in
12662 builtin-attribute.def that is used for the math functions. */
12663 TREE_NOTHROW (t) = 1;
12664 if (flag_rounding_math)
12666 DECL_PURE_P (t) = 1;
12667 DECL_IS_NOVOPS (t) = 1;
12668 attr_string = ", fp, pure";
12672 TREE_READONLY (t) = 1;
12673 attr_string = ", fp, const";
12676 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
12677 gcc_unreachable ();
12679 if (TARGET_DEBUG_BUILTIN)
12680 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
12681 (int)code, name, attr_string);
12684 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
12686 #undef RS6000_BUILTIN_0
12687 #undef RS6000_BUILTIN_1
12688 #undef RS6000_BUILTIN_2
12689 #undef RS6000_BUILTIN_3
12690 #undef RS6000_BUILTIN_A
12691 #undef RS6000_BUILTIN_D
12692 #undef RS6000_BUILTIN_H
12693 #undef RS6000_BUILTIN_P
12694 #undef RS6000_BUILTIN_X
12696 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12697 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12698 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12699 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
12700 { MASK, ICODE, NAME, ENUM },
12702 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12703 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12704 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12705 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12706 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12708 static const struct builtin_description bdesc_3arg[] =
12710 #include "rs6000-builtin.def"
12713 /* DST operations: void foo (void *, const int, const char). */
12715 #undef RS6000_BUILTIN_0
12716 #undef RS6000_BUILTIN_1
12717 #undef RS6000_BUILTIN_2
12718 #undef RS6000_BUILTIN_3
12719 #undef RS6000_BUILTIN_A
12720 #undef RS6000_BUILTIN_D
12721 #undef RS6000_BUILTIN_H
12722 #undef RS6000_BUILTIN_P
12723 #undef RS6000_BUILTIN_X
12725 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12726 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12727 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12728 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12729 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12730 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
12731 { MASK, ICODE, NAME, ENUM },
12733 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12734 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12735 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12737 static const struct builtin_description bdesc_dst[] =
12739 #include "rs6000-builtin.def"
12742 /* Simple binary operations: VECc = foo (VECa, VECb). */
12744 #undef RS6000_BUILTIN_0
12745 #undef RS6000_BUILTIN_1
12746 #undef RS6000_BUILTIN_2
12747 #undef RS6000_BUILTIN_3
12748 #undef RS6000_BUILTIN_A
12749 #undef RS6000_BUILTIN_D
12750 #undef RS6000_BUILTIN_H
12751 #undef RS6000_BUILTIN_P
12752 #undef RS6000_BUILTIN_X
12754 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12755 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12756 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
12757 { MASK, ICODE, NAME, ENUM },
12759 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12760 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12761 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12762 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12763 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12764 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12766 static const struct builtin_description bdesc_2arg[] =
12768 #include "rs6000-builtin.def"
12771 #undef RS6000_BUILTIN_0
12772 #undef RS6000_BUILTIN_1
12773 #undef RS6000_BUILTIN_2
12774 #undef RS6000_BUILTIN_3
12775 #undef RS6000_BUILTIN_A
12776 #undef RS6000_BUILTIN_D
12777 #undef RS6000_BUILTIN_H
12778 #undef RS6000_BUILTIN_P
12779 #undef RS6000_BUILTIN_X
12781 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12782 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12783 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12784 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12785 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12786 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12787 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12788 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
12789 { MASK, ICODE, NAME, ENUM },
12791 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12793 /* AltiVec predicates. */
12795 static const struct builtin_description bdesc_altivec_preds[] =
12797 #include "rs6000-builtin.def"
12800 /* ABS* operations. */
12802 #undef RS6000_BUILTIN_0
12803 #undef RS6000_BUILTIN_1
12804 #undef RS6000_BUILTIN_2
12805 #undef RS6000_BUILTIN_3
12806 #undef RS6000_BUILTIN_A
12807 #undef RS6000_BUILTIN_D
12808 #undef RS6000_BUILTIN_H
12809 #undef RS6000_BUILTIN_P
12810 #undef RS6000_BUILTIN_X
12812 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12813 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12814 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12815 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12816 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
12817 { MASK, ICODE, NAME, ENUM },
12819 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12820 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12821 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12822 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12824 static const struct builtin_description bdesc_abs[] =
12826 #include "rs6000-builtin.def"
12829 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
12832 #undef RS6000_BUILTIN_0
12833 #undef RS6000_BUILTIN_1
12834 #undef RS6000_BUILTIN_2
12835 #undef RS6000_BUILTIN_3
12836 #undef RS6000_BUILTIN_A
12837 #undef RS6000_BUILTIN_D
12838 #undef RS6000_BUILTIN_H
12839 #undef RS6000_BUILTIN_P
12840 #undef RS6000_BUILTIN_X
12842 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12843 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
12844 { MASK, ICODE, NAME, ENUM },
12846 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12847 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12848 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12849 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12850 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12851 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12852 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12854 static const struct builtin_description bdesc_1arg[] =
12856 #include "rs6000-builtin.def"
12859 /* Simple no-argument operations: result = __builtin_darn_32 () */
12861 #undef RS6000_BUILTIN_0
12862 #undef RS6000_BUILTIN_1
12863 #undef RS6000_BUILTIN_2
12864 #undef RS6000_BUILTIN_3
12865 #undef RS6000_BUILTIN_A
12866 #undef RS6000_BUILTIN_D
12867 #undef RS6000_BUILTIN_H
12868 #undef RS6000_BUILTIN_P
12869 #undef RS6000_BUILTIN_X
12871 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
12872 { MASK, ICODE, NAME, ENUM },
12874 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12875 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12876 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12877 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12878 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12879 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12880 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12881 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12883 static const struct builtin_description bdesc_0arg[] =
12885 #include "rs6000-builtin.def"
12888 /* HTM builtins. */
12889 #undef RS6000_BUILTIN_0
12890 #undef RS6000_BUILTIN_1
12891 #undef RS6000_BUILTIN_2
12892 #undef RS6000_BUILTIN_3
12893 #undef RS6000_BUILTIN_A
12894 #undef RS6000_BUILTIN_D
12895 #undef RS6000_BUILTIN_H
12896 #undef RS6000_BUILTIN_P
12897 #undef RS6000_BUILTIN_X
12899 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12900 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12901 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12902 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12903 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12904 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12905 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
12906 { MASK, ICODE, NAME, ENUM },
12908 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12909 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12911 static const struct builtin_description bdesc_htm[] =
12913 #include "rs6000-builtin.def"
12916 #undef RS6000_BUILTIN_0
12917 #undef RS6000_BUILTIN_1
12918 #undef RS6000_BUILTIN_2
12919 #undef RS6000_BUILTIN_3
12920 #undef RS6000_BUILTIN_A
12921 #undef RS6000_BUILTIN_D
12922 #undef RS6000_BUILTIN_H
12923 #undef RS6000_BUILTIN_P
12925 /* Return true if a builtin function is overloaded. */
12927 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
12929 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
12933 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
12935 return rs6000_builtin_info[(int)fncode].name;
12938 /* Expand an expression EXP that calls a builtin without arguments. */
12940 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
12943 machine_mode tmode = insn_data[icode].operand[0].mode;
12945 if (icode == CODE_FOR_nothing)
12946 /* Builtin not supported on this processor. */
12949 if (icode == CODE_FOR_rs6000_mffsl
12950 && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
12952 error ("%<__builtin_mffsl%> not supported with %<-msoft-float%>");
12957 || GET_MODE (target) != tmode
12958 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12959 target = gen_reg_rtx (tmode);
12961 pat = GEN_FCN (icode) (target);
12971 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
12974 tree arg0 = CALL_EXPR_ARG (exp, 0);
12975 tree arg1 = CALL_EXPR_ARG (exp, 1);
12976 rtx op0 = expand_normal (arg0);
12977 rtx op1 = expand_normal (arg1);
12978 machine_mode mode0 = insn_data[icode].operand[0].mode;
12979 machine_mode mode1 = insn_data[icode].operand[1].mode;
12981 if (icode == CODE_FOR_nothing)
12982 /* Builtin not supported on this processor. */
12985 /* If we got invalid arguments bail out before generating bad rtl. */
12986 if (arg0 == error_mark_node || arg1 == error_mark_node)
12989 if (!CONST_INT_P (op0)
12990 || INTVAL (op0) > 255
12991 || INTVAL (op0) < 0)
12993 error ("argument 1 must be an 8-bit field value");
12997 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12998 op0 = copy_to_mode_reg (mode0, op0);
13000 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13001 op1 = copy_to_mode_reg (mode1, op1);
13003 pat = GEN_FCN (icode) (op0, op1);
13012 rs6000_expand_mtfsb_builtin (enum insn_code icode, tree exp)
13015 tree arg0 = CALL_EXPR_ARG (exp, 0);
13016 rtx op0 = expand_normal (arg0);
13018 if (icode == CODE_FOR_nothing)
13019 /* Builtin not supported on this processor. */
13022 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13024 error ("%<__builtin_mtfsb0%> and %<__builtin_mtfsb1%> not supported with "
13025 "%<-msoft-float%>");
13029 /* If we got invalid arguments bail out before generating bad rtl. */
13030 if (arg0 == error_mark_node)
13033 /* Only allow bit numbers 0 to 31. */
13034 if (!u5bit_cint_operand (op0, VOIDmode))
13036 error ("Argument must be a constant between 0 and 31.");
13040 pat = GEN_FCN (icode) (op0);
13049 rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp)
13052 tree arg0 = CALL_EXPR_ARG (exp, 0);
13053 rtx op0 = expand_normal (arg0);
13054 machine_mode mode0 = insn_data[icode].operand[0].mode;
13056 if (icode == CODE_FOR_nothing)
13057 /* Builtin not supported on this processor. */
13060 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13062 error ("%<__builtin_set_fpscr_rn%> not supported with %<-msoft-float%>");
13066 /* If we got invalid arguments bail out before generating bad rtl. */
13067 if (arg0 == error_mark_node)
13070 /* If the argument is a constant, check the range. Argument can only be a
13071 2-bit value. Unfortunately, can't check the range of the value at
13072 compile time if the argument is a variable. The least significant two
13073 bits of the argument, regardless of type, are used to set the rounding
13074 mode. All other bits are ignored. */
13075 if (CONST_INT_P (op0) && !const_0_to_3_operand(op0, VOIDmode))
13077 error ("Argument must be a value between 0 and 3.");
13081 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13082 op0 = copy_to_mode_reg (mode0, op0);
13084 pat = GEN_FCN (icode) (op0);
13092 rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp)
13095 tree arg0 = CALL_EXPR_ARG (exp, 0);
13096 rtx op0 = expand_normal (arg0);
13097 machine_mode mode0 = insn_data[icode].operand[0].mode;
13100 /* Builtin not supported in 32-bit mode. */
13101 fatal_error (input_location,
13102 "%<__builtin_set_fpscr_drn%> is not supported "
13105 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13107 error ("%<__builtin_set_fpscr_drn%> not supported with %<-msoft-float%>");
13111 if (icode == CODE_FOR_nothing)
13112 /* Builtin not supported on this processor. */
13115 /* If we got invalid arguments bail out before generating bad rtl. */
13116 if (arg0 == error_mark_node)
13119 /* If the argument is a constant, check the range. Agrument can only be a
13120 3-bit value. Unfortunately, can't check the range of the value at
13121 compile time if the argument is a variable. The least significant two
13122 bits of the argument, regardless of type, are used to set the rounding
13123 mode. All other bits are ignored. */
13124 if (CONST_INT_P (op0) && !const_0_to_7_operand(op0, VOIDmode))
13126 error ("Argument must be a value between 0 and 7.");
13130 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13131 op0 = copy_to_mode_reg (mode0, op0);
13133 pat = GEN_FCN (icode) (op0);
13142 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
13145 tree arg0 = CALL_EXPR_ARG (exp, 0);
13146 rtx op0 = expand_normal (arg0);
13147 machine_mode tmode = insn_data[icode].operand[0].mode;
13148 machine_mode mode0 = insn_data[icode].operand[1].mode;
13150 if (icode == CODE_FOR_nothing)
13151 /* Builtin not supported on this processor. */
13154 /* If we got invalid arguments bail out before generating bad rtl. */
13155 if (arg0 == error_mark_node)
13158 if (icode == CODE_FOR_altivec_vspltisb
13159 || icode == CODE_FOR_altivec_vspltish
13160 || icode == CODE_FOR_altivec_vspltisw)
13162 /* Only allow 5-bit *signed* literals. */
13163 if (!CONST_INT_P (op0)
13164 || INTVAL (op0) > 15
13165 || INTVAL (op0) < -16)
13167 error ("argument 1 must be a 5-bit signed literal");
13168 return CONST0_RTX (tmode);
13173 || GET_MODE (target) != tmode
13174 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13175 target = gen_reg_rtx (tmode);
13177 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13178 op0 = copy_to_mode_reg (mode0, op0);
13180 pat = GEN_FCN (icode) (target, op0);
13189 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
13191 rtx pat, scratch1, scratch2;
13192 tree arg0 = CALL_EXPR_ARG (exp, 0);
13193 rtx op0 = expand_normal (arg0);
13194 machine_mode tmode = insn_data[icode].operand[0].mode;
13195 machine_mode mode0 = insn_data[icode].operand[1].mode;
13197 /* If we have invalid arguments, bail out before generating bad rtl. */
13198 if (arg0 == error_mark_node)
13202 || GET_MODE (target) != tmode
13203 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13204 target = gen_reg_rtx (tmode);
13206 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13207 op0 = copy_to_mode_reg (mode0, op0);
13209 scratch1 = gen_reg_rtx (mode0);
13210 scratch2 = gen_reg_rtx (mode0);
13212 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
13221 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
13224 tree arg0 = CALL_EXPR_ARG (exp, 0);
13225 tree arg1 = CALL_EXPR_ARG (exp, 1);
13226 rtx op0 = expand_normal (arg0);
13227 rtx op1 = expand_normal (arg1);
13228 machine_mode tmode = insn_data[icode].operand[0].mode;
13229 machine_mode mode0 = insn_data[icode].operand[1].mode;
13230 machine_mode mode1 = insn_data[icode].operand[2].mode;
13232 if (icode == CODE_FOR_nothing)
13233 /* Builtin not supported on this processor. */
13236 /* If we got invalid arguments bail out before generating bad rtl. */
13237 if (arg0 == error_mark_node || arg1 == error_mark_node)
13240 if (icode == CODE_FOR_unpackv1ti
13241 || icode == CODE_FOR_unpackkf
13242 || icode == CODE_FOR_unpacktf
13243 || icode == CODE_FOR_unpackif
13244 || icode == CODE_FOR_unpacktd)
13246 /* Only allow 1-bit unsigned literals. */
13248 if (TREE_CODE (arg1) != INTEGER_CST
13249 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
13251 error ("argument 2 must be a 1-bit unsigned literal");
13252 return CONST0_RTX (tmode);
13255 else if (icode == CODE_FOR_altivec_vspltw)
13257 /* Only allow 2-bit unsigned literals. */
13259 if (TREE_CODE (arg1) != INTEGER_CST
13260 || TREE_INT_CST_LOW (arg1) & ~3)
13262 error ("argument 2 must be a 2-bit unsigned literal");
13263 return CONST0_RTX (tmode);
13266 else if (icode == CODE_FOR_altivec_vsplth)
13268 /* Only allow 3-bit unsigned literals. */
13270 if (TREE_CODE (arg1) != INTEGER_CST
13271 || TREE_INT_CST_LOW (arg1) & ~7)
13273 error ("argument 2 must be a 3-bit unsigned literal");
13274 return CONST0_RTX (tmode);
13277 else if (icode == CODE_FOR_altivec_vspltb)
13279 /* Only allow 4-bit unsigned literals. */
13281 if (TREE_CODE (arg1) != INTEGER_CST
13282 || TREE_INT_CST_LOW (arg1) & ~15)
13284 error ("argument 2 must be a 4-bit unsigned literal");
13285 return CONST0_RTX (tmode);
13288 else if (icode == CODE_FOR_altivec_vcfux
13289 || icode == CODE_FOR_altivec_vcfsx
13290 || icode == CODE_FOR_altivec_vctsxs
13291 || icode == CODE_FOR_altivec_vctuxs)
13293 /* Only allow 5-bit unsigned literals. */
13295 if (TREE_CODE (arg1) != INTEGER_CST
13296 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13298 error ("argument 2 must be a 5-bit unsigned literal");
13299 return CONST0_RTX (tmode);
13302 else if (icode == CODE_FOR_dfptstsfi_eq_dd
13303 || icode == CODE_FOR_dfptstsfi_lt_dd
13304 || icode == CODE_FOR_dfptstsfi_gt_dd
13305 || icode == CODE_FOR_dfptstsfi_unordered_dd
13306 || icode == CODE_FOR_dfptstsfi_eq_td
13307 || icode == CODE_FOR_dfptstsfi_lt_td
13308 || icode == CODE_FOR_dfptstsfi_gt_td
13309 || icode == CODE_FOR_dfptstsfi_unordered_td)
13311 /* Only allow 6-bit unsigned literals. */
13313 if (TREE_CODE (arg0) != INTEGER_CST
13314 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
13316 error ("argument 1 must be a 6-bit unsigned literal");
13317 return CONST0_RTX (tmode);
13320 else if (icode == CODE_FOR_xststdcqp_kf
13321 || icode == CODE_FOR_xststdcqp_tf
13322 || icode == CODE_FOR_xststdcdp
13323 || icode == CODE_FOR_xststdcsp
13324 || icode == CODE_FOR_xvtstdcdp
13325 || icode == CODE_FOR_xvtstdcsp)
13327 /* Only allow 7-bit unsigned literals. */
13329 if (TREE_CODE (arg1) != INTEGER_CST
13330 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
13332 error ("argument 2 must be a 7-bit unsigned literal");
13333 return CONST0_RTX (tmode);
13338 || GET_MODE (target) != tmode
13339 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13340 target = gen_reg_rtx (tmode);
13342 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13343 op0 = copy_to_mode_reg (mode0, op0);
13344 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13345 op1 = copy_to_mode_reg (mode1, op1);
13347 pat = GEN_FCN (icode) (target, op0, op1);
13356 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13359 tree cr6_form = CALL_EXPR_ARG (exp, 0);
13360 tree arg0 = CALL_EXPR_ARG (exp, 1);
13361 tree arg1 = CALL_EXPR_ARG (exp, 2);
13362 rtx op0 = expand_normal (arg0);
13363 rtx op1 = expand_normal (arg1);
13364 machine_mode tmode = SImode;
13365 machine_mode mode0 = insn_data[icode].operand[1].mode;
13366 machine_mode mode1 = insn_data[icode].operand[2].mode;
13369 if (TREE_CODE (cr6_form) != INTEGER_CST)
13371 error ("argument 1 of %qs must be a constant",
13372 "__builtin_altivec_predicate");
13376 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
13378 gcc_assert (mode0 == mode1);
13380 /* If we have invalid arguments, bail out before generating bad rtl. */
13381 if (arg0 == error_mark_node || arg1 == error_mark_node)
13385 || GET_MODE (target) != tmode
13386 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13387 target = gen_reg_rtx (tmode);
13389 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13390 op0 = copy_to_mode_reg (mode0, op0);
13391 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13392 op1 = copy_to_mode_reg (mode1, op1);
13394 /* Note that for many of the relevant operations (e.g. cmpne or
13395 cmpeq) with float or double operands, it makes more sense for the
13396 mode of the allocated scratch register to select a vector of
13397 integer. But the choice to copy the mode of operand 0 was made
13398 long ago and there are no plans to change it. */
13399 scratch = gen_reg_rtx (mode0);
13401 pat = GEN_FCN (icode) (scratch, op0, op1);
13406 /* The vec_any* and vec_all* predicates use the same opcodes for two
13407 different operations, but the bits in CR6 will be different
13408 depending on what information we want. So we have to play tricks
13409 with CR6 to get the right bits out.
13411 If you think this is disgusting, look at the specs for the
13412 AltiVec predicates. */
13414 switch (cr6_form_int)
13417 emit_insn (gen_cr6_test_for_zero (target));
13420 emit_insn (gen_cr6_test_for_zero_reverse (target));
13423 emit_insn (gen_cr6_test_for_lt (target));
13426 emit_insn (gen_cr6_test_for_lt_reverse (target));
13429 error ("argument 1 of %qs is out of range",
13430 "__builtin_altivec_predicate");
13438 swap_endian_selector_for_mode (machine_mode mode)
13440 unsigned int swap1[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
13441 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
13442 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
13443 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
13445 unsigned int *swaparray, i;
13465 gcc_unreachable ();
13468 for (i = 0; i < 16; ++i)
13469 perm[i] = GEN_INT (swaparray[i]);
13471 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode,
13472 gen_rtvec_v (16, perm)));
13476 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
13479 tree arg0 = CALL_EXPR_ARG (exp, 0);
13480 tree arg1 = CALL_EXPR_ARG (exp, 1);
13481 machine_mode tmode = insn_data[icode].operand[0].mode;
13482 machine_mode mode0 = Pmode;
13483 machine_mode mode1 = Pmode;
13484 rtx op0 = expand_normal (arg0);
13485 rtx op1 = expand_normal (arg1);
13487 if (icode == CODE_FOR_nothing)
13488 /* Builtin not supported on this processor. */
13491 /* If we got invalid arguments bail out before generating bad rtl. */
13492 if (arg0 == error_mark_node || arg1 == error_mark_node)
13496 || GET_MODE (target) != tmode
13497 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13498 target = gen_reg_rtx (tmode);
13500 op1 = copy_to_mode_reg (mode1, op1);
13502 /* For LVX, express the RTL accurately by ANDing the address with -16.
13503 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
13504 so the raw address is fine. */
13505 if (icode == CODE_FOR_altivec_lvx_v1ti
13506 || icode == CODE_FOR_altivec_lvx_v2df
13507 || icode == CODE_FOR_altivec_lvx_v2di
13508 || icode == CODE_FOR_altivec_lvx_v4sf
13509 || icode == CODE_FOR_altivec_lvx_v4si
13510 || icode == CODE_FOR_altivec_lvx_v8hi
13511 || icode == CODE_FOR_altivec_lvx_v16qi)
13514 if (op0 == const0_rtx)
13518 op0 = copy_to_mode_reg (mode0, op0);
13519 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
13521 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13522 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
13524 emit_insn (gen_rtx_SET (target, addr));
13528 if (op0 == const0_rtx)
13529 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
13532 op0 = copy_to_mode_reg (mode0, op0);
13533 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
13534 gen_rtx_PLUS (Pmode, op1, op0));
13537 pat = GEN_FCN (icode) (target, addr);
13547 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
13550 tree arg0 = CALL_EXPR_ARG (exp, 0);
13551 tree arg1 = CALL_EXPR_ARG (exp, 1);
13552 tree arg2 = CALL_EXPR_ARG (exp, 2);
13553 rtx op0 = expand_normal (arg0);
13554 rtx op1 = expand_normal (arg1);
13555 rtx op2 = expand_normal (arg2);
13556 machine_mode mode0 = insn_data[icode].operand[0].mode;
13557 machine_mode mode1 = insn_data[icode].operand[1].mode;
13558 machine_mode mode2 = insn_data[icode].operand[2].mode;
13560 if (icode == CODE_FOR_nothing)
13561 /* Builtin not supported on this processor. */
13564 /* If we got invalid arguments bail out before generating bad rtl. */
13565 if (arg0 == error_mark_node
13566 || arg1 == error_mark_node
13567 || arg2 == error_mark_node)
13570 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13571 op0 = copy_to_mode_reg (mode0, op0);
13572 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13573 op1 = copy_to_mode_reg (mode1, op1);
13574 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13575 op2 = copy_to_mode_reg (mode2, op2);
13577 pat = GEN_FCN (icode) (op0, op1, op2);
13585 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
13587 tree arg0 = CALL_EXPR_ARG (exp, 0);
13588 tree arg1 = CALL_EXPR_ARG (exp, 1);
13589 tree arg2 = CALL_EXPR_ARG (exp, 2);
13590 rtx op0 = expand_normal (arg0);
13591 rtx op1 = expand_normal (arg1);
13592 rtx op2 = expand_normal (arg2);
13593 rtx pat, addr, rawaddr;
13594 machine_mode tmode = insn_data[icode].operand[0].mode;
13595 machine_mode smode = insn_data[icode].operand[1].mode;
13596 machine_mode mode1 = Pmode;
13597 machine_mode mode2 = Pmode;
13599 /* Invalid arguments. Bail before doing anything stoopid! */
13600 if (arg0 == error_mark_node
13601 || arg1 == error_mark_node
13602 || arg2 == error_mark_node)
13605 op2 = copy_to_mode_reg (mode2, op2);
13607 /* For STVX, express the RTL accurately by ANDing the address with -16.
13608 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
13609 so the raw address is fine. */
13610 if (icode == CODE_FOR_altivec_stvx_v2df
13611 || icode == CODE_FOR_altivec_stvx_v2di
13612 || icode == CODE_FOR_altivec_stvx_v4sf
13613 || icode == CODE_FOR_altivec_stvx_v4si
13614 || icode == CODE_FOR_altivec_stvx_v8hi
13615 || icode == CODE_FOR_altivec_stvx_v16qi)
13617 if (op1 == const0_rtx)
13621 op1 = copy_to_mode_reg (mode1, op1);
13622 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
13625 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13626 addr = gen_rtx_MEM (tmode, addr);
13628 op0 = copy_to_mode_reg (tmode, op0);
13630 emit_insn (gen_rtx_SET (addr, op0));
13634 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
13635 op0 = copy_to_mode_reg (smode, op0);
13637 if (op1 == const0_rtx)
13638 addr = gen_rtx_MEM (tmode, op2);
13641 op1 = copy_to_mode_reg (mode1, op1);
13642 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
13645 pat = GEN_FCN (icode) (addr, op0);
13653 /* Return the appropriate SPR number associated with the given builtin. */
13654 static inline HOST_WIDE_INT
13655 htm_spr_num (enum rs6000_builtins code)
13657 if (code == HTM_BUILTIN_GET_TFHAR
13658 || code == HTM_BUILTIN_SET_TFHAR)
13660 else if (code == HTM_BUILTIN_GET_TFIAR
13661 || code == HTM_BUILTIN_SET_TFIAR)
13663 else if (code == HTM_BUILTIN_GET_TEXASR
13664 || code == HTM_BUILTIN_SET_TEXASR)
13666 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
13667 || code == HTM_BUILTIN_SET_TEXASRU);
13668 return TEXASRU_SPR;
13671 /* Return the correct ICODE value depending on whether we are
13672 setting or reading the HTM SPRs. */
13673 static inline enum insn_code
13674 rs6000_htm_spr_icode (bool nonvoid)
13677 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
13679 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
13682 /* Expand the HTM builtin in EXP and store the result in TARGET.
13683 Store true in *EXPANDEDP if we found a builtin to expand. */
13685 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
13687 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13688 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
13689 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13690 const struct builtin_description *d;
13695 if (!TARGET_POWERPC64
13696 && (fcode == HTM_BUILTIN_TABORTDC
13697 || fcode == HTM_BUILTIN_TABORTDCI))
13699 size_t uns_fcode = (size_t)fcode;
13700 const char *name = rs6000_builtin_info[uns_fcode].name;
13701 error ("builtin %qs is only valid in 64-bit mode", name);
13705 /* Expand the HTM builtins. */
13707 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
13708 if (d->code == fcode)
13710 rtx op[MAX_HTM_OPERANDS], pat;
13713 call_expr_arg_iterator iter;
13714 unsigned attr = rs6000_builtin_info[fcode].attr;
13715 enum insn_code icode = d->icode;
13716 const struct insn_operand_data *insn_op;
13717 bool uses_spr = (attr & RS6000_BTC_SPR);
13721 icode = rs6000_htm_spr_icode (nonvoid);
13722 insn_op = &insn_data[icode].operand[0];
13726 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
13728 || GET_MODE (target) != tmode
13729 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
13730 target = gen_reg_rtx (tmode);
13732 op[nopnds++] = target;
13735 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
13737 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
13740 insn_op = &insn_data[icode].operand[nopnds];
13742 op[nopnds] = expand_normal (arg);
13744 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
13746 if (!strcmp (insn_op->constraint, "n"))
13748 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
13749 if (!CONST_INT_P (op[nopnds]))
13750 error ("argument %d must be an unsigned literal", arg_num);
13752 error ("argument %d is an unsigned literal that is "
13753 "out of range", arg_num);
13756 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
13762 /* Handle the builtins for extended mnemonics. These accept
13763 no arguments, but map to builtins that take arguments. */
13766 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
13767 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
13768 op[nopnds++] = GEN_INT (1);
13770 attr |= RS6000_BTC_UNARY;
13772 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
13773 op[nopnds++] = GEN_INT (0);
13775 attr |= RS6000_BTC_UNARY;
13781 /* If this builtin accesses SPRs, then pass in the appropriate
13782 SPR number and SPR regno as the last two operands. */
13785 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
13786 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
13788 /* If this builtin accesses a CR, then pass in a scratch
13789 CR as the last operand. */
13790 else if (attr & RS6000_BTC_CR)
13791 { cr = gen_reg_rtx (CCmode);
13797 int expected_nopnds = 0;
13798 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
13799 expected_nopnds = 1;
13800 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
13801 expected_nopnds = 2;
13802 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
13803 expected_nopnds = 3;
13804 if (!(attr & RS6000_BTC_VOID))
13805 expected_nopnds += 1;
13807 expected_nopnds += 1;
13809 gcc_assert (nopnds == expected_nopnds
13810 && nopnds <= MAX_HTM_OPERANDS);
13816 pat = GEN_FCN (icode) (op[0]);
13819 pat = GEN_FCN (icode) (op[0], op[1]);
13822 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
13825 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
13828 gcc_unreachable ();
13834 if (attr & RS6000_BTC_CR)
13836 if (fcode == HTM_BUILTIN_TBEGIN)
13838 /* Emit code to set TARGET to true or false depending on
13839 whether the tbegin. instruction successfully or failed
13840 to start a transaction. We do this by placing the 1's
13841 complement of CR's EQ bit into TARGET. */
13842 rtx scratch = gen_reg_rtx (SImode);
13843 emit_insn (gen_rtx_SET (scratch,
13844 gen_rtx_EQ (SImode, cr,
13846 emit_insn (gen_rtx_SET (target,
13847 gen_rtx_XOR (SImode, scratch,
13852 /* Emit code to copy the 4-bit condition register field
13853 CR into the least significant end of register TARGET. */
13854 rtx scratch1 = gen_reg_rtx (SImode);
13855 rtx scratch2 = gen_reg_rtx (SImode);
13856 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
13857 emit_insn (gen_movcc (subreg, cr));
13858 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
13859 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
13868 *expandedp = false;
13872 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
13875 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
13878 /* __builtin_cpu_init () is a nop, so expand to nothing. */
13879 if (fcode == RS6000_BUILTIN_CPU_INIT)
13882 if (target == 0 || GET_MODE (target) != SImode)
13883 target = gen_reg_rtx (SImode);
13885 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
13886 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
13887 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
13888 to a STRING_CST. */
13889 if (TREE_CODE (arg) == ARRAY_REF
13890 && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST
13891 && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST
13892 && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0)
13893 arg = TREE_OPERAND (arg, 0);
13895 if (TREE_CODE (arg) != STRING_CST)
13897 error ("builtin %qs only accepts a string argument",
13898 rs6000_builtin_info[(size_t) fcode].name);
13902 if (fcode == RS6000_BUILTIN_CPU_IS)
13904 const char *cpu = TREE_STRING_POINTER (arg);
13905 rtx cpuid = NULL_RTX;
13906 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
13907 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
13909 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
13910 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
13913 if (cpuid == NULL_RTX)
13915 /* Invalid CPU argument. */
13916 error ("cpu %qs is an invalid argument to builtin %qs",
13917 cpu, rs6000_builtin_info[(size_t) fcode].name);
13921 rtx platform = gen_reg_rtx (SImode);
13922 rtx tcbmem = gen_const_mem (SImode,
13923 gen_rtx_PLUS (Pmode,
13924 gen_rtx_REG (Pmode, TLS_REGNUM),
13925 GEN_INT (TCB_PLATFORM_OFFSET)));
13926 emit_move_insn (platform, tcbmem);
13927 emit_insn (gen_eqsi3 (target, platform, cpuid));
13929 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
13931 const char *hwcap = TREE_STRING_POINTER (arg);
13932 rtx mask = NULL_RTX;
13934 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
13935 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
13937 mask = GEN_INT (cpu_supports_info[i].mask);
13938 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
13941 if (mask == NULL_RTX)
13943 /* Invalid HWCAP argument. */
13944 error ("%s %qs is an invalid argument to builtin %qs",
13945 "hwcap", hwcap, rs6000_builtin_info[(size_t) fcode].name);
13949 rtx tcb_hwcap = gen_reg_rtx (SImode);
13950 rtx tcbmem = gen_const_mem (SImode,
13951 gen_rtx_PLUS (Pmode,
13952 gen_rtx_REG (Pmode, TLS_REGNUM),
13953 GEN_INT (hwcap_offset)));
13954 emit_move_insn (tcb_hwcap, tcbmem);
13955 rtx scratch1 = gen_reg_rtx (SImode);
13956 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
13957 rtx scratch2 = gen_reg_rtx (SImode);
13958 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
13959 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
13962 gcc_unreachable ();
13964 /* Record that we have expanded a CPU builtin, so that we can later
13965 emit a reference to the special symbol exported by LIBC to ensure we
13966 do not link against an old LIBC that doesn't support this feature. */
13967 cpu_builtin_p = true;
13970 warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware "
13971 "capability bits", rs6000_builtin_info[(size_t) fcode].name);
13973 /* For old LIBCs, always return FALSE. */
13974 emit_move_insn (target, GEN_INT (0));
13975 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
13981 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
13984 tree arg0 = CALL_EXPR_ARG (exp, 0);
13985 tree arg1 = CALL_EXPR_ARG (exp, 1);
13986 tree arg2 = CALL_EXPR_ARG (exp, 2);
13987 rtx op0 = expand_normal (arg0);
13988 rtx op1 = expand_normal (arg1);
13989 rtx op2 = expand_normal (arg2);
13990 machine_mode tmode = insn_data[icode].operand[0].mode;
13991 machine_mode mode0 = insn_data[icode].operand[1].mode;
13992 machine_mode mode1 = insn_data[icode].operand[2].mode;
13993 machine_mode mode2 = insn_data[icode].operand[3].mode;
13995 if (icode == CODE_FOR_nothing)
13996 /* Builtin not supported on this processor. */
13999 /* If we got invalid arguments bail out before generating bad rtl. */
14000 if (arg0 == error_mark_node
14001 || arg1 == error_mark_node
14002 || arg2 == error_mark_node)
14005 /* Check and prepare argument depending on the instruction code.
14007 Note that a switch statement instead of the sequence of tests
14008 would be incorrect as many of the CODE_FOR values could be
14009 CODE_FOR_nothing and that would yield multiple alternatives
14010 with identical values. We'd never reach here at runtime in
14012 if (icode == CODE_FOR_altivec_vsldoi_v4sf
14013 || icode == CODE_FOR_altivec_vsldoi_v2df
14014 || icode == CODE_FOR_altivec_vsldoi_v4si
14015 || icode == CODE_FOR_altivec_vsldoi_v8hi
14016 || icode == CODE_FOR_altivec_vsldoi_v16qi)
14018 /* Only allow 4-bit unsigned literals. */
14020 if (TREE_CODE (arg2) != INTEGER_CST
14021 || TREE_INT_CST_LOW (arg2) & ~0xf)
14023 error ("argument 3 must be a 4-bit unsigned literal");
14024 return CONST0_RTX (tmode);
14027 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
14028 || icode == CODE_FOR_vsx_xxpermdi_v2di
14029 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
14030 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
14031 || icode == CODE_FOR_vsx_xxpermdi_v1ti
14032 || icode == CODE_FOR_vsx_xxpermdi_v4sf
14033 || icode == CODE_FOR_vsx_xxpermdi_v4si
14034 || icode == CODE_FOR_vsx_xxpermdi_v8hi
14035 || icode == CODE_FOR_vsx_xxpermdi_v16qi
14036 || icode == CODE_FOR_vsx_xxsldwi_v16qi
14037 || icode == CODE_FOR_vsx_xxsldwi_v8hi
14038 || icode == CODE_FOR_vsx_xxsldwi_v4si
14039 || icode == CODE_FOR_vsx_xxsldwi_v4sf
14040 || icode == CODE_FOR_vsx_xxsldwi_v2di
14041 || icode == CODE_FOR_vsx_xxsldwi_v2df)
14043 /* Only allow 2-bit unsigned literals. */
14045 if (TREE_CODE (arg2) != INTEGER_CST
14046 || TREE_INT_CST_LOW (arg2) & ~0x3)
14048 error ("argument 3 must be a 2-bit unsigned literal");
14049 return CONST0_RTX (tmode);
14052 else if (icode == CODE_FOR_vsx_set_v2df
14053 || icode == CODE_FOR_vsx_set_v2di
14054 || icode == CODE_FOR_bcdadd
14055 || icode == CODE_FOR_bcdadd_lt
14056 || icode == CODE_FOR_bcdadd_eq
14057 || icode == CODE_FOR_bcdadd_gt
14058 || icode == CODE_FOR_bcdsub
14059 || icode == CODE_FOR_bcdsub_lt
14060 || icode == CODE_FOR_bcdsub_eq
14061 || icode == CODE_FOR_bcdsub_gt)
14063 /* Only allow 1-bit unsigned literals. */
14065 if (TREE_CODE (arg2) != INTEGER_CST
14066 || TREE_INT_CST_LOW (arg2) & ~0x1)
14068 error ("argument 3 must be a 1-bit unsigned literal");
14069 return CONST0_RTX (tmode);
14072 else if (icode == CODE_FOR_dfp_ddedpd_dd
14073 || icode == CODE_FOR_dfp_ddedpd_td)
14075 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
14077 if (TREE_CODE (arg0) != INTEGER_CST
14078 || TREE_INT_CST_LOW (arg2) & ~0x3)
14080 error ("argument 1 must be 0 or 2");
14081 return CONST0_RTX (tmode);
14084 else if (icode == CODE_FOR_dfp_denbcd_dd
14085 || icode == CODE_FOR_dfp_denbcd_td)
14087 /* Only allow 1-bit unsigned literals. */
14089 if (TREE_CODE (arg0) != INTEGER_CST
14090 || TREE_INT_CST_LOW (arg0) & ~0x1)
14092 error ("argument 1 must be a 1-bit unsigned literal");
14093 return CONST0_RTX (tmode);
14096 else if (icode == CODE_FOR_dfp_dscli_dd
14097 || icode == CODE_FOR_dfp_dscli_td
14098 || icode == CODE_FOR_dfp_dscri_dd
14099 || icode == CODE_FOR_dfp_dscri_td)
14101 /* Only allow 6-bit unsigned literals. */
14103 if (TREE_CODE (arg1) != INTEGER_CST
14104 || TREE_INT_CST_LOW (arg1) & ~0x3f)
14106 error ("argument 2 must be a 6-bit unsigned literal");
14107 return CONST0_RTX (tmode);
14110 else if (icode == CODE_FOR_crypto_vshasigmaw
14111 || icode == CODE_FOR_crypto_vshasigmad)
14113 /* Check whether the 2nd and 3rd arguments are integer constants and in
14114 range and prepare arguments. */
14116 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (wi::to_wide (arg1), 2))
14118 error ("argument 2 must be 0 or 1");
14119 return CONST0_RTX (tmode);
14123 if (TREE_CODE (arg2) != INTEGER_CST
14124 || wi::geu_p (wi::to_wide (arg2), 16))
14126 error ("argument 3 must be in the range [0, 15]");
14127 return CONST0_RTX (tmode);
14132 || GET_MODE (target) != tmode
14133 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14134 target = gen_reg_rtx (tmode);
14136 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14137 op0 = copy_to_mode_reg (mode0, op0);
14138 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14139 op1 = copy_to_mode_reg (mode1, op1);
14140 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14141 op2 = copy_to_mode_reg (mode2, op2);
14143 pat = GEN_FCN (icode) (target, op0, op1, op2);
14152 /* Expand the dst builtins. */
14154 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
14157 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14158 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14159 tree arg0, arg1, arg2;
14160 machine_mode mode0, mode1;
14161 rtx pat, op0, op1, op2;
14162 const struct builtin_description *d;
14165 *expandedp = false;
14167 /* Handle DST variants. */
14169 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14170 if (d->code == fcode)
14172 arg0 = CALL_EXPR_ARG (exp, 0);
14173 arg1 = CALL_EXPR_ARG (exp, 1);
14174 arg2 = CALL_EXPR_ARG (exp, 2);
14175 op0 = expand_normal (arg0);
14176 op1 = expand_normal (arg1);
14177 op2 = expand_normal (arg2);
14178 mode0 = insn_data[d->icode].operand[0].mode;
14179 mode1 = insn_data[d->icode].operand[1].mode;
14181 /* Invalid arguments, bail out before generating bad rtl. */
14182 if (arg0 == error_mark_node
14183 || arg1 == error_mark_node
14184 || arg2 == error_mark_node)
14189 if (TREE_CODE (arg2) != INTEGER_CST
14190 || TREE_INT_CST_LOW (arg2) & ~0x3)
14192 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
14196 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14197 op0 = copy_to_mode_reg (Pmode, op0);
14198 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14199 op1 = copy_to_mode_reg (mode1, op1);
14201 pat = GEN_FCN (d->icode) (op0, op1, op2);
14211 /* Expand vec_init builtin. */
14213 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
14215 machine_mode tmode = TYPE_MODE (type);
14216 machine_mode inner_mode = GET_MODE_INNER (tmode);
14217 int i, n_elt = GET_MODE_NUNITS (tmode);
14219 gcc_assert (VECTOR_MODE_P (tmode));
14220 gcc_assert (n_elt == call_expr_nargs (exp));
14222 if (!target || !register_operand (target, tmode))
14223 target = gen_reg_rtx (tmode);
14225 /* If we have a vector compromised of a single element, such as V1TImode, do
14226 the initialization directly. */
14227 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
14229 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
14230 emit_move_insn (target, gen_lowpart (tmode, x));
14234 rtvec v = rtvec_alloc (n_elt);
14236 for (i = 0; i < n_elt; ++i)
14238 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
14239 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14242 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
14248 /* Return the integer constant in ARG. Constrain it to be in the range
14249 of the subparts of VEC_TYPE; issue an error if not. */
14252 get_element_number (tree vec_type, tree arg)
14254 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14256 if (!tree_fits_uhwi_p (arg)
14257 || (elt = tree_to_uhwi (arg), elt > max))
14259 error ("selector must be an integer constant in the range [0, %wi]", max);
14266 /* Expand vec_set builtin. */
14268 altivec_expand_vec_set_builtin (tree exp)
14270 machine_mode tmode, mode1;
14271 tree arg0, arg1, arg2;
14275 arg0 = CALL_EXPR_ARG (exp, 0);
14276 arg1 = CALL_EXPR_ARG (exp, 1);
14277 arg2 = CALL_EXPR_ARG (exp, 2);
14279 tmode = TYPE_MODE (TREE_TYPE (arg0));
14280 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14281 gcc_assert (VECTOR_MODE_P (tmode));
14283 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
14284 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
14285 elt = get_element_number (TREE_TYPE (arg0), arg2);
14287 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14288 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14290 op0 = force_reg (tmode, op0);
14291 op1 = force_reg (mode1, op1);
14293 rs6000_expand_vector_set (op0, op1, elt);
14298 /* Expand vec_ext builtin. */
14300 altivec_expand_vec_ext_builtin (tree exp, rtx target)
14302 machine_mode tmode, mode0;
14307 arg0 = CALL_EXPR_ARG (exp, 0);
14308 arg1 = CALL_EXPR_ARG (exp, 1);
14310 op0 = expand_normal (arg0);
14311 op1 = expand_normal (arg1);
14313 if (TREE_CODE (arg1) == INTEGER_CST)
14315 unsigned HOST_WIDE_INT elt;
14316 unsigned HOST_WIDE_INT size = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
14317 unsigned int truncated_selector;
14318 /* Even if !tree_fits_uhwi_p (arg1)), TREE_INT_CST_LOW (arg0)
14319 returns low-order bits of INTEGER_CST for modulo indexing. */
14320 elt = TREE_INT_CST_LOW (arg1);
14321 truncated_selector = elt % size;
14322 op1 = GEN_INT (truncated_selector);
14325 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14326 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14327 gcc_assert (VECTOR_MODE_P (mode0));
14329 op0 = force_reg (mode0, op0);
14331 if (optimize || !target || !register_operand (target, tmode))
14332 target = gen_reg_rtx (tmode);
14334 rs6000_expand_vector_extract (target, op0, op1);
14339 /* Expand the builtin in EXP and store the result in TARGET. Store
14340 true in *EXPANDEDP if we found a builtin to expand. */
14342 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
14344 const struct builtin_description *d;
14346 enum insn_code icode;
14347 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14348 tree arg0, arg1, arg2;
14350 machine_mode tmode, mode0;
14351 enum rs6000_builtins fcode
14352 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14354 if (rs6000_overloaded_builtin_p (fcode))
14357 error ("unresolved overload for Altivec builtin %qF", fndecl);
14359 /* Given it is invalid, just generate a normal call. */
14360 return expand_call (exp, target, false);
14363 target = altivec_expand_dst_builtin (exp, target, expandedp);
14371 case ALTIVEC_BUILTIN_STVX_V2DF:
14372 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
14373 case ALTIVEC_BUILTIN_STVX_V2DI:
14374 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
14375 case ALTIVEC_BUILTIN_STVX_V4SF:
14376 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
14377 case ALTIVEC_BUILTIN_STVX:
14378 case ALTIVEC_BUILTIN_STVX_V4SI:
14379 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
14380 case ALTIVEC_BUILTIN_STVX_V8HI:
14381 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
14382 case ALTIVEC_BUILTIN_STVX_V16QI:
14383 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
14384 case ALTIVEC_BUILTIN_STVEBX:
14385 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
14386 case ALTIVEC_BUILTIN_STVEHX:
14387 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
14388 case ALTIVEC_BUILTIN_STVEWX:
14389 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
14390 case ALTIVEC_BUILTIN_STVXL_V2DF:
14391 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
14392 case ALTIVEC_BUILTIN_STVXL_V2DI:
14393 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
14394 case ALTIVEC_BUILTIN_STVXL_V4SF:
14395 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
14396 case ALTIVEC_BUILTIN_STVXL:
14397 case ALTIVEC_BUILTIN_STVXL_V4SI:
14398 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
14399 case ALTIVEC_BUILTIN_STVXL_V8HI:
14400 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
14401 case ALTIVEC_BUILTIN_STVXL_V16QI:
14402 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
14404 case ALTIVEC_BUILTIN_STVLX:
14405 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
14406 case ALTIVEC_BUILTIN_STVLXL:
14407 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
14408 case ALTIVEC_BUILTIN_STVRX:
14409 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
14410 case ALTIVEC_BUILTIN_STVRXL:
14411 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
14413 case P9V_BUILTIN_STXVL:
14414 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
14416 case P9V_BUILTIN_XST_LEN_R:
14417 return altivec_expand_stxvl_builtin (CODE_FOR_xst_len_r, exp);
14419 case VSX_BUILTIN_STXVD2X_V1TI:
14420 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
14421 case VSX_BUILTIN_STXVD2X_V2DF:
14422 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
14423 case VSX_BUILTIN_STXVD2X_V2DI:
14424 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
14425 case VSX_BUILTIN_STXVW4X_V4SF:
14426 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
14427 case VSX_BUILTIN_STXVW4X_V4SI:
14428 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
14429 case VSX_BUILTIN_STXVW4X_V8HI:
14430 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
14431 case VSX_BUILTIN_STXVW4X_V16QI:
14432 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
14434 /* For the following on big endian, it's ok to use any appropriate
14435 unaligned-supporting store, so use a generic expander. For
14436 little-endian, the exact element-reversing instruction must
14438 case VSX_BUILTIN_ST_ELEMREV_V1TI:
14440 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti
14441 : CODE_FOR_vsx_st_elemrev_v1ti);
14442 return altivec_expand_stv_builtin (code, exp);
14444 case VSX_BUILTIN_ST_ELEMREV_V2DF:
14446 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
14447 : CODE_FOR_vsx_st_elemrev_v2df);
14448 return altivec_expand_stv_builtin (code, exp);
14450 case VSX_BUILTIN_ST_ELEMREV_V2DI:
14452 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
14453 : CODE_FOR_vsx_st_elemrev_v2di);
14454 return altivec_expand_stv_builtin (code, exp);
14456 case VSX_BUILTIN_ST_ELEMREV_V4SF:
14458 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
14459 : CODE_FOR_vsx_st_elemrev_v4sf);
14460 return altivec_expand_stv_builtin (code, exp);
14462 case VSX_BUILTIN_ST_ELEMREV_V4SI:
14464 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
14465 : CODE_FOR_vsx_st_elemrev_v4si);
14466 return altivec_expand_stv_builtin (code, exp);
14468 case VSX_BUILTIN_ST_ELEMREV_V8HI:
14470 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
14471 : CODE_FOR_vsx_st_elemrev_v8hi);
14472 return altivec_expand_stv_builtin (code, exp);
14474 case VSX_BUILTIN_ST_ELEMREV_V16QI:
14476 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
14477 : CODE_FOR_vsx_st_elemrev_v16qi);
14478 return altivec_expand_stv_builtin (code, exp);
14481 case ALTIVEC_BUILTIN_MFVSCR:
14482 icode = CODE_FOR_altivec_mfvscr;
14483 tmode = insn_data[icode].operand[0].mode;
14486 || GET_MODE (target) != tmode
14487 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14488 target = gen_reg_rtx (tmode);
14490 pat = GEN_FCN (icode) (target);
14496 case ALTIVEC_BUILTIN_MTVSCR:
14497 icode = CODE_FOR_altivec_mtvscr;
14498 arg0 = CALL_EXPR_ARG (exp, 0);
14499 op0 = expand_normal (arg0);
14500 mode0 = insn_data[icode].operand[0].mode;
14502 /* If we got invalid arguments bail out before generating bad rtl. */
14503 if (arg0 == error_mark_node)
14506 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14507 op0 = copy_to_mode_reg (mode0, op0);
14509 pat = GEN_FCN (icode) (op0);
14514 case ALTIVEC_BUILTIN_DSSALL:
14515 emit_insn (gen_altivec_dssall ());
14518 case ALTIVEC_BUILTIN_DSS:
14519 icode = CODE_FOR_altivec_dss;
14520 arg0 = CALL_EXPR_ARG (exp, 0);
14522 op0 = expand_normal (arg0);
14523 mode0 = insn_data[icode].operand[0].mode;
14525 /* If we got invalid arguments bail out before generating bad rtl. */
14526 if (arg0 == error_mark_node)
14529 if (TREE_CODE (arg0) != INTEGER_CST
14530 || TREE_INT_CST_LOW (arg0) & ~0x3)
14532 error ("argument to %qs must be a 2-bit unsigned literal", "dss");
14536 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14537 op0 = copy_to_mode_reg (mode0, op0);
14539 emit_insn (gen_altivec_dss (op0));
14542 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
14543 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
14544 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
14545 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
14546 case VSX_BUILTIN_VEC_INIT_V2DF:
14547 case VSX_BUILTIN_VEC_INIT_V2DI:
14548 case VSX_BUILTIN_VEC_INIT_V1TI:
14549 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
14551 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
14552 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
14553 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
14554 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
14555 case VSX_BUILTIN_VEC_SET_V2DF:
14556 case VSX_BUILTIN_VEC_SET_V2DI:
14557 case VSX_BUILTIN_VEC_SET_V1TI:
14558 return altivec_expand_vec_set_builtin (exp);
14560 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
14561 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
14562 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
14563 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
14564 case VSX_BUILTIN_VEC_EXT_V2DF:
14565 case VSX_BUILTIN_VEC_EXT_V2DI:
14566 case VSX_BUILTIN_VEC_EXT_V1TI:
14567 return altivec_expand_vec_ext_builtin (exp, target);
14569 case P9V_BUILTIN_VEC_EXTRACT4B:
14570 arg1 = CALL_EXPR_ARG (exp, 1);
14573 /* Generate a normal call if it is invalid. */
14574 if (arg1 == error_mark_node)
14575 return expand_call (exp, target, false);
14577 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
14579 error ("second argument to %qs must be [0, 12]", "vec_vextract4b");
14580 return expand_call (exp, target, false);
14584 case P9V_BUILTIN_VEC_INSERT4B:
14585 arg2 = CALL_EXPR_ARG (exp, 2);
14588 /* Generate a normal call if it is invalid. */
14589 if (arg2 == error_mark_node)
14590 return expand_call (exp, target, false);
14592 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
14594 error ("third argument to %qs must be [0, 12]", "vec_vinsert4b");
14595 return expand_call (exp, target, false);
14601 /* Fall through. */
14604 /* Expand abs* operations. */
14606 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14607 if (d->code == fcode)
14608 return altivec_expand_abs_builtin (d->icode, exp, target);
14610 /* Expand the AltiVec predicates. */
14611 d = bdesc_altivec_preds;
14612 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14613 if (d->code == fcode)
14614 return altivec_expand_predicate_builtin (d->icode, exp, target);
14616 /* LV* are funky. We initialized them differently. */
14619 case ALTIVEC_BUILTIN_LVSL:
14620 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
14621 exp, target, false);
14622 case ALTIVEC_BUILTIN_LVSR:
14623 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
14624 exp, target, false);
14625 case ALTIVEC_BUILTIN_LVEBX:
14626 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
14627 exp, target, false);
14628 case ALTIVEC_BUILTIN_LVEHX:
14629 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
14630 exp, target, false);
14631 case ALTIVEC_BUILTIN_LVEWX:
14632 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
14633 exp, target, false);
14634 case ALTIVEC_BUILTIN_LVXL_V2DF:
14635 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
14636 exp, target, false);
14637 case ALTIVEC_BUILTIN_LVXL_V2DI:
14638 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
14639 exp, target, false);
14640 case ALTIVEC_BUILTIN_LVXL_V4SF:
14641 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
14642 exp, target, false);
14643 case ALTIVEC_BUILTIN_LVXL:
14644 case ALTIVEC_BUILTIN_LVXL_V4SI:
14645 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
14646 exp, target, false);
14647 case ALTIVEC_BUILTIN_LVXL_V8HI:
14648 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
14649 exp, target, false);
14650 case ALTIVEC_BUILTIN_LVXL_V16QI:
14651 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
14652 exp, target, false);
14653 case ALTIVEC_BUILTIN_LVX_V1TI:
14654 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v1ti,
14655 exp, target, false);
14656 case ALTIVEC_BUILTIN_LVX_V2DF:
14657 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
14658 exp, target, false);
14659 case ALTIVEC_BUILTIN_LVX_V2DI:
14660 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
14661 exp, target, false);
14662 case ALTIVEC_BUILTIN_LVX_V4SF:
14663 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
14664 exp, target, false);
14665 case ALTIVEC_BUILTIN_LVX:
14666 case ALTIVEC_BUILTIN_LVX_V4SI:
14667 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
14668 exp, target, false);
14669 case ALTIVEC_BUILTIN_LVX_V8HI:
14670 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
14671 exp, target, false);
14672 case ALTIVEC_BUILTIN_LVX_V16QI:
14673 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
14674 exp, target, false);
14675 case ALTIVEC_BUILTIN_LVLX:
14676 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
14677 exp, target, true);
14678 case ALTIVEC_BUILTIN_LVLXL:
14679 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
14680 exp, target, true);
14681 case ALTIVEC_BUILTIN_LVRX:
14682 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
14683 exp, target, true);
14684 case ALTIVEC_BUILTIN_LVRXL:
14685 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
14686 exp, target, true);
14687 case VSX_BUILTIN_LXVD2X_V1TI:
14688 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
14689 exp, target, false);
14690 case VSX_BUILTIN_LXVD2X_V2DF:
14691 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
14692 exp, target, false);
14693 case VSX_BUILTIN_LXVD2X_V2DI:
14694 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
14695 exp, target, false);
14696 case VSX_BUILTIN_LXVW4X_V4SF:
14697 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
14698 exp, target, false);
14699 case VSX_BUILTIN_LXVW4X_V4SI:
14700 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
14701 exp, target, false);
14702 case VSX_BUILTIN_LXVW4X_V8HI:
14703 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
14704 exp, target, false);
14705 case VSX_BUILTIN_LXVW4X_V16QI:
14706 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
14707 exp, target, false);
14708 /* For the following on big endian, it's ok to use any appropriate
14709 unaligned-supporting load, so use a generic expander. For
14710 little-endian, the exact element-reversing instruction must
14712 case VSX_BUILTIN_LD_ELEMREV_V2DF:
14714 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
14715 : CODE_FOR_vsx_ld_elemrev_v2df);
14716 return altivec_expand_lv_builtin (code, exp, target, false);
14718 case VSX_BUILTIN_LD_ELEMREV_V1TI:
14720 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti
14721 : CODE_FOR_vsx_ld_elemrev_v1ti);
14722 return altivec_expand_lv_builtin (code, exp, target, false);
14724 case VSX_BUILTIN_LD_ELEMREV_V2DI:
14726 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
14727 : CODE_FOR_vsx_ld_elemrev_v2di);
14728 return altivec_expand_lv_builtin (code, exp, target, false);
14730 case VSX_BUILTIN_LD_ELEMREV_V4SF:
14732 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
14733 : CODE_FOR_vsx_ld_elemrev_v4sf);
14734 return altivec_expand_lv_builtin (code, exp, target, false);
14736 case VSX_BUILTIN_LD_ELEMREV_V4SI:
14738 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
14739 : CODE_FOR_vsx_ld_elemrev_v4si);
14740 return altivec_expand_lv_builtin (code, exp, target, false);
14742 case VSX_BUILTIN_LD_ELEMREV_V8HI:
14744 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
14745 : CODE_FOR_vsx_ld_elemrev_v8hi);
14746 return altivec_expand_lv_builtin (code, exp, target, false);
14748 case VSX_BUILTIN_LD_ELEMREV_V16QI:
14750 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
14751 : CODE_FOR_vsx_ld_elemrev_v16qi);
14752 return altivec_expand_lv_builtin (code, exp, target, false);
14757 /* Fall through. */
14760 *expandedp = false;
14764 /* Check whether a builtin function is supported in this target
14767 rs6000_builtin_is_supported_p (enum rs6000_builtins fncode)
14769 HOST_WIDE_INT fnmask = rs6000_builtin_info[fncode].mask;
14770 if ((fnmask & rs6000_builtin_mask) != fnmask)
14776 /* Raise an error message for a builtin function that is called without the
14777 appropriate target options being set. */
14780 rs6000_invalid_builtin (enum rs6000_builtins fncode)
14782 size_t uns_fncode = (size_t) fncode;
14783 const char *name = rs6000_builtin_info[uns_fncode].name;
14784 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
14786 gcc_assert (name != NULL);
14787 if ((fnmask & RS6000_BTM_CELL) != 0)
14788 error ("%qs is only valid for the cell processor", name);
14789 else if ((fnmask & RS6000_BTM_VSX) != 0)
14790 error ("%qs requires the %qs option", name, "-mvsx");
14791 else if ((fnmask & RS6000_BTM_HTM) != 0)
14792 error ("%qs requires the %qs option", name, "-mhtm");
14793 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
14794 error ("%qs requires the %qs option", name, "-maltivec");
14795 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14796 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14797 error ("%qs requires the %qs and %qs options", name, "-mhard-dfp",
14798 "-mpower8-vector");
14799 else if ((fnmask & RS6000_BTM_DFP) != 0)
14800 error ("%qs requires the %qs option", name, "-mhard-dfp");
14801 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
14802 error ("%qs requires the %qs option", name, "-mpower8-vector");
14803 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
14804 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
14805 error ("%qs requires the %qs and %qs options", name, "-mcpu=power9",
14807 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
14808 error ("%qs requires the %qs option", name, "-mcpu=power9");
14809 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
14810 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
14811 error ("%qs requires the %qs and %qs options", name, "-mcpu=power9",
14813 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
14814 error ("%qs requires the %qs option", name, "-mcpu=power9");
14815 else if ((fnmask & RS6000_BTM_LDBL128) == RS6000_BTM_LDBL128)
14817 if (!TARGET_HARD_FLOAT)
14818 error ("%qs requires the %qs option", name, "-mhard-float");
14820 error ("%qs requires the %qs option", name,
14821 TARGET_IEEEQUAD ? "-mabi=ibmlongdouble" : "-mlong-double-128");
14823 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
14824 error ("%qs requires the %qs option", name, "-mhard-float");
14825 else if ((fnmask & RS6000_BTM_FLOAT128_HW) != 0)
14826 error ("%qs requires ISA 3.0 IEEE 128-bit floating point", name);
14827 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
14828 error ("%qs requires the %qs option", name, "%<-mfloat128%>");
14829 else if ((fnmask & (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
14830 == (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
14831 error ("%qs requires the %qs (or newer), and %qs or %qs options",
14832 name, "-mcpu=power7", "-m64", "-mpowerpc64");
14834 error ("%qs is not supported with the current options", name);
14837 /* Target hook for early folding of built-ins, shamelessly stolen
14841 rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED,
14842 int n_args ATTRIBUTE_UNUSED,
14843 tree *args ATTRIBUTE_UNUSED,
14844 bool ignore ATTRIBUTE_UNUSED)
14846 #ifdef SUBTARGET_FOLD_BUILTIN
14847 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
14853 /* Helper function to sort out which built-ins may be valid without having
14856 rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
14860 case ALTIVEC_BUILTIN_STVX_V16QI:
14861 case ALTIVEC_BUILTIN_STVX_V8HI:
14862 case ALTIVEC_BUILTIN_STVX_V4SI:
14863 case ALTIVEC_BUILTIN_STVX_V4SF:
14864 case ALTIVEC_BUILTIN_STVX_V2DI:
14865 case ALTIVEC_BUILTIN_STVX_V2DF:
14866 case VSX_BUILTIN_STXVW4X_V16QI:
14867 case VSX_BUILTIN_STXVW4X_V8HI:
14868 case VSX_BUILTIN_STXVW4X_V4SF:
14869 case VSX_BUILTIN_STXVW4X_V4SI:
14870 case VSX_BUILTIN_STXVD2X_V2DF:
14871 case VSX_BUILTIN_STXVD2X_V2DI:
14878 /* Helper function to handle the gimple folding of a vector compare
14879 operation. This sets up true/false vectors, and uses the
14880 VEC_COND_EXPR operation.
14881 CODE indicates which comparison is to be made. (EQ, GT, ...).
14882 TYPE indicates the type of the result. */
14884 fold_build_vec_cmp (tree_code code, tree type,
14885 tree arg0, tree arg1)
14887 tree cmp_type = build_same_sized_truth_vector_type (type);
14888 tree zero_vec = build_zero_cst (type);
14889 tree minus_one_vec = build_minus_one_cst (type);
14890 tree cmp = fold_build2 (code, cmp_type, arg0, arg1);
14891 return fold_build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
14894 /* Helper function to handle the in-between steps for the
14895 vector compare built-ins. */
14897 fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt)
14899 tree arg0 = gimple_call_arg (stmt, 0);
14900 tree arg1 = gimple_call_arg (stmt, 1);
14901 tree lhs = gimple_call_lhs (stmt);
14902 tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1);
14903 gimple *g = gimple_build_assign (lhs, cmp);
14904 gimple_set_location (g, gimple_location (stmt));
14905 gsi_replace (gsi, g, true);
14908 /* Helper function to map V2DF and V4SF types to their
14909 integral equivalents (V2DI and V4SI). */
14910 tree map_to_integral_tree_type (tree input_tree_type)
14912 if (INTEGRAL_TYPE_P (TREE_TYPE (input_tree_type)))
14913 return input_tree_type;
14916 if (types_compatible_p (TREE_TYPE (input_tree_type),
14917 TREE_TYPE (V2DF_type_node)))
14918 return V2DI_type_node;
14919 else if (types_compatible_p (TREE_TYPE (input_tree_type),
14920 TREE_TYPE (V4SF_type_node)))
14921 return V4SI_type_node;
14923 gcc_unreachable ();
14927 /* Helper function to handle the vector merge[hl] built-ins. The
14928 implementation difference between h and l versions for this code are in
14929 the values used when building of the permute vector for high word versus
14930 low word merge. The variance is keyed off the use_high parameter. */
14932 fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high)
14934 tree arg0 = gimple_call_arg (stmt, 0);
14935 tree arg1 = gimple_call_arg (stmt, 1);
14936 tree lhs = gimple_call_lhs (stmt);
14937 tree lhs_type = TREE_TYPE (lhs);
14938 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
14939 int midpoint = n_elts / 2;
14945 /* The permute_type will match the lhs for integral types. For double and
14946 float types, the permute type needs to map to the V2 or V4 type that
14949 permute_type = map_to_integral_tree_type (lhs_type);
14950 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
14952 for (int i = 0; i < midpoint; i++)
14954 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14956 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14957 offset + n_elts + i));
14960 tree permute = elts.build ();
14962 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
14963 gimple_set_location (g, gimple_location (stmt));
14964 gsi_replace (gsi, g, true);
14967 /* Helper function to handle the vector merge[eo] built-ins. */
14969 fold_mergeeo_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_odd)
14971 tree arg0 = gimple_call_arg (stmt, 0);
14972 tree arg1 = gimple_call_arg (stmt, 1);
14973 tree lhs = gimple_call_lhs (stmt);
14974 tree lhs_type = TREE_TYPE (lhs);
14975 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
14977 /* The permute_type will match the lhs for integral types. For double and
14978 float types, the permute type needs to map to the V2 or V4 type that
14981 permute_type = map_to_integral_tree_type (lhs_type);
14983 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
14985 /* Build the permute vector. */
14986 for (int i = 0; i < n_elts / 2; i++)
14988 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14990 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14991 2*i + use_odd + n_elts));
14994 tree permute = elts.build ();
14996 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
14997 gimple_set_location (g, gimple_location (stmt));
14998 gsi_replace (gsi, g, true);
15001 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
15002 a constant, use rs6000_fold_builtin.) */
15005 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
15007 gimple *stmt = gsi_stmt (*gsi);
15008 tree fndecl = gimple_call_fndecl (stmt);
15009 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
15010 enum rs6000_builtins fn_code
15011 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15012 tree arg0, arg1, lhs, temp;
15013 enum tree_code bcode;
15016 size_t uns_fncode = (size_t) fn_code;
15017 enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
15018 const char *fn_name1 = rs6000_builtin_info[uns_fncode].name;
15019 const char *fn_name2 = (icode != CODE_FOR_nothing)
15020 ? get_insn_name ((int) icode)
15023 if (TARGET_DEBUG_BUILTIN)
15024 fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n",
15025 fn_code, fn_name1, fn_name2);
15027 if (!rs6000_fold_gimple)
15030 /* Prevent gimple folding for code that does not have a LHS, unless it is
15031 allowed per the rs6000_builtin_valid_without_lhs helper function. */
15032 if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code))
15035 /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */
15036 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fncode].mask;
15037 bool func_valid_p = (rs6000_builtin_mask & mask) == mask;
15043 /* Flavors of vec_add. We deliberately don't expand
15044 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
15045 TImode, resulting in much poorer code generation. */
15046 case ALTIVEC_BUILTIN_VADDUBM:
15047 case ALTIVEC_BUILTIN_VADDUHM:
15048 case ALTIVEC_BUILTIN_VADDUWM:
15049 case P8V_BUILTIN_VADDUDM:
15050 case ALTIVEC_BUILTIN_VADDFP:
15051 case VSX_BUILTIN_XVADDDP:
15054 arg0 = gimple_call_arg (stmt, 0);
15055 arg1 = gimple_call_arg (stmt, 1);
15056 lhs = gimple_call_lhs (stmt);
15057 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (lhs)))
15058 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (lhs))))
15060 /* Ensure the binary operation is performed in a type
15061 that wraps if it is integral type. */
15062 gimple_seq stmts = NULL;
15063 tree type = unsigned_type_for (TREE_TYPE (lhs));
15064 tree uarg0 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15066 tree uarg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15068 tree res = gimple_build (&stmts, gimple_location (stmt), bcode,
15069 type, uarg0, uarg1);
15070 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15071 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR,
15072 build1 (VIEW_CONVERT_EXPR,
15073 TREE_TYPE (lhs), res));
15074 gsi_replace (gsi, g, true);
15077 g = gimple_build_assign (lhs, bcode, arg0, arg1);
15078 gimple_set_location (g, gimple_location (stmt));
15079 gsi_replace (gsi, g, true);
15081 /* Flavors of vec_sub. We deliberately don't expand
15082 P8V_BUILTIN_VSUBUQM. */
15083 case ALTIVEC_BUILTIN_VSUBUBM:
15084 case ALTIVEC_BUILTIN_VSUBUHM:
15085 case ALTIVEC_BUILTIN_VSUBUWM:
15086 case P8V_BUILTIN_VSUBUDM:
15087 case ALTIVEC_BUILTIN_VSUBFP:
15088 case VSX_BUILTIN_XVSUBDP:
15089 bcode = MINUS_EXPR;
15091 case VSX_BUILTIN_XVMULSP:
15092 case VSX_BUILTIN_XVMULDP:
15093 arg0 = gimple_call_arg (stmt, 0);
15094 arg1 = gimple_call_arg (stmt, 1);
15095 lhs = gimple_call_lhs (stmt);
15096 g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
15097 gimple_set_location (g, gimple_location (stmt));
15098 gsi_replace (gsi, g, true);
15100 /* Even element flavors of vec_mul (signed). */
15101 case ALTIVEC_BUILTIN_VMULESB:
15102 case ALTIVEC_BUILTIN_VMULESH:
15103 case P8V_BUILTIN_VMULESW:
15104 /* Even element flavors of vec_mul (unsigned). */
15105 case ALTIVEC_BUILTIN_VMULEUB:
15106 case ALTIVEC_BUILTIN_VMULEUH:
15107 case P8V_BUILTIN_VMULEUW:
15108 arg0 = gimple_call_arg (stmt, 0);
15109 arg1 = gimple_call_arg (stmt, 1);
15110 lhs = gimple_call_lhs (stmt);
15111 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
15112 gimple_set_location (g, gimple_location (stmt));
15113 gsi_replace (gsi, g, true);
15115 /* Odd element flavors of vec_mul (signed). */
15116 case ALTIVEC_BUILTIN_VMULOSB:
15117 case ALTIVEC_BUILTIN_VMULOSH:
15118 case P8V_BUILTIN_VMULOSW:
15119 /* Odd element flavors of vec_mul (unsigned). */
15120 case ALTIVEC_BUILTIN_VMULOUB:
15121 case ALTIVEC_BUILTIN_VMULOUH:
15122 case P8V_BUILTIN_VMULOUW:
15123 arg0 = gimple_call_arg (stmt, 0);
15124 arg1 = gimple_call_arg (stmt, 1);
15125 lhs = gimple_call_lhs (stmt);
15126 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
15127 gimple_set_location (g, gimple_location (stmt));
15128 gsi_replace (gsi, g, true);
15130 /* Flavors of vec_div (Integer). */
15131 case VSX_BUILTIN_DIV_V2DI:
15132 case VSX_BUILTIN_UDIV_V2DI:
15133 arg0 = gimple_call_arg (stmt, 0);
15134 arg1 = gimple_call_arg (stmt, 1);
15135 lhs = gimple_call_lhs (stmt);
15136 g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
15137 gimple_set_location (g, gimple_location (stmt));
15138 gsi_replace (gsi, g, true);
15140 /* Flavors of vec_div (Float). */
15141 case VSX_BUILTIN_XVDIVSP:
15142 case VSX_BUILTIN_XVDIVDP:
15143 arg0 = gimple_call_arg (stmt, 0);
15144 arg1 = gimple_call_arg (stmt, 1);
15145 lhs = gimple_call_lhs (stmt);
15146 g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
15147 gimple_set_location (g, gimple_location (stmt));
15148 gsi_replace (gsi, g, true);
15150 /* Flavors of vec_and. */
15151 case ALTIVEC_BUILTIN_VAND:
15152 arg0 = gimple_call_arg (stmt, 0);
15153 arg1 = gimple_call_arg (stmt, 1);
15154 lhs = gimple_call_lhs (stmt);
15155 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
15156 gimple_set_location (g, gimple_location (stmt));
15157 gsi_replace (gsi, g, true);
15159 /* Flavors of vec_andc. */
15160 case ALTIVEC_BUILTIN_VANDC:
15161 arg0 = gimple_call_arg (stmt, 0);
15162 arg1 = gimple_call_arg (stmt, 1);
15163 lhs = gimple_call_lhs (stmt);
15164 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15165 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15166 gimple_set_location (g, gimple_location (stmt));
15167 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15168 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
15169 gimple_set_location (g, gimple_location (stmt));
15170 gsi_replace (gsi, g, true);
15172 /* Flavors of vec_nand. */
15173 case P8V_BUILTIN_VEC_NAND:
15174 case P8V_BUILTIN_NAND_V16QI:
15175 case P8V_BUILTIN_NAND_V8HI:
15176 case P8V_BUILTIN_NAND_V4SI:
15177 case P8V_BUILTIN_NAND_V4SF:
15178 case P8V_BUILTIN_NAND_V2DF:
15179 case P8V_BUILTIN_NAND_V2DI:
15180 arg0 = gimple_call_arg (stmt, 0);
15181 arg1 = gimple_call_arg (stmt, 1);
15182 lhs = gimple_call_lhs (stmt);
15183 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15184 g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
15185 gimple_set_location (g, gimple_location (stmt));
15186 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15187 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15188 gimple_set_location (g, gimple_location (stmt));
15189 gsi_replace (gsi, g, true);
15191 /* Flavors of vec_or. */
15192 case ALTIVEC_BUILTIN_VOR:
15193 arg0 = gimple_call_arg (stmt, 0);
15194 arg1 = gimple_call_arg (stmt, 1);
15195 lhs = gimple_call_lhs (stmt);
15196 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
15197 gimple_set_location (g, gimple_location (stmt));
15198 gsi_replace (gsi, g, true);
15200 /* flavors of vec_orc. */
15201 case P8V_BUILTIN_ORC_V16QI:
15202 case P8V_BUILTIN_ORC_V8HI:
15203 case P8V_BUILTIN_ORC_V4SI:
15204 case P8V_BUILTIN_ORC_V4SF:
15205 case P8V_BUILTIN_ORC_V2DF:
15206 case P8V_BUILTIN_ORC_V2DI:
15207 arg0 = gimple_call_arg (stmt, 0);
15208 arg1 = gimple_call_arg (stmt, 1);
15209 lhs = gimple_call_lhs (stmt);
15210 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15211 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15212 gimple_set_location (g, gimple_location (stmt));
15213 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15214 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
15215 gimple_set_location (g, gimple_location (stmt));
15216 gsi_replace (gsi, g, true);
15218 /* Flavors of vec_xor. */
15219 case ALTIVEC_BUILTIN_VXOR:
15220 arg0 = gimple_call_arg (stmt, 0);
15221 arg1 = gimple_call_arg (stmt, 1);
15222 lhs = gimple_call_lhs (stmt);
15223 g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
15224 gimple_set_location (g, gimple_location (stmt));
15225 gsi_replace (gsi, g, true);
15227 /* Flavors of vec_nor. */
15228 case ALTIVEC_BUILTIN_VNOR:
15229 arg0 = gimple_call_arg (stmt, 0);
15230 arg1 = gimple_call_arg (stmt, 1);
15231 lhs = gimple_call_lhs (stmt);
15232 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15233 g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
15234 gimple_set_location (g, gimple_location (stmt));
15235 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15236 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15237 gimple_set_location (g, gimple_location (stmt));
15238 gsi_replace (gsi, g, true);
15240 /* flavors of vec_abs. */
15241 case ALTIVEC_BUILTIN_ABS_V16QI:
15242 case ALTIVEC_BUILTIN_ABS_V8HI:
15243 case ALTIVEC_BUILTIN_ABS_V4SI:
15244 case ALTIVEC_BUILTIN_ABS_V4SF:
15245 case P8V_BUILTIN_ABS_V2DI:
15246 case VSX_BUILTIN_XVABSDP:
15247 arg0 = gimple_call_arg (stmt, 0);
15248 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
15249 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
15251 lhs = gimple_call_lhs (stmt);
15252 g = gimple_build_assign (lhs, ABS_EXPR, arg0);
15253 gimple_set_location (g, gimple_location (stmt));
15254 gsi_replace (gsi, g, true);
15256 /* flavors of vec_min. */
15257 case VSX_BUILTIN_XVMINDP:
15258 case P8V_BUILTIN_VMINSD:
15259 case P8V_BUILTIN_VMINUD:
15260 case ALTIVEC_BUILTIN_VMINSB:
15261 case ALTIVEC_BUILTIN_VMINSH:
15262 case ALTIVEC_BUILTIN_VMINSW:
15263 case ALTIVEC_BUILTIN_VMINUB:
15264 case ALTIVEC_BUILTIN_VMINUH:
15265 case ALTIVEC_BUILTIN_VMINUW:
15266 case ALTIVEC_BUILTIN_VMINFP:
15267 arg0 = gimple_call_arg (stmt, 0);
15268 arg1 = gimple_call_arg (stmt, 1);
15269 lhs = gimple_call_lhs (stmt);
15270 g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1);
15271 gimple_set_location (g, gimple_location (stmt));
15272 gsi_replace (gsi, g, true);
15274 /* flavors of vec_max. */
15275 case VSX_BUILTIN_XVMAXDP:
15276 case P8V_BUILTIN_VMAXSD:
15277 case P8V_BUILTIN_VMAXUD:
15278 case ALTIVEC_BUILTIN_VMAXSB:
15279 case ALTIVEC_BUILTIN_VMAXSH:
15280 case ALTIVEC_BUILTIN_VMAXSW:
15281 case ALTIVEC_BUILTIN_VMAXUB:
15282 case ALTIVEC_BUILTIN_VMAXUH:
15283 case ALTIVEC_BUILTIN_VMAXUW:
15284 case ALTIVEC_BUILTIN_VMAXFP:
15285 arg0 = gimple_call_arg (stmt, 0);
15286 arg1 = gimple_call_arg (stmt, 1);
15287 lhs = gimple_call_lhs (stmt);
15288 g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1);
15289 gimple_set_location (g, gimple_location (stmt));
15290 gsi_replace (gsi, g, true);
15292 /* Flavors of vec_eqv. */
15293 case P8V_BUILTIN_EQV_V16QI:
15294 case P8V_BUILTIN_EQV_V8HI:
15295 case P8V_BUILTIN_EQV_V4SI:
15296 case P8V_BUILTIN_EQV_V4SF:
15297 case P8V_BUILTIN_EQV_V2DF:
15298 case P8V_BUILTIN_EQV_V2DI:
15299 arg0 = gimple_call_arg (stmt, 0);
15300 arg1 = gimple_call_arg (stmt, 1);
15301 lhs = gimple_call_lhs (stmt);
15302 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15303 g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
15304 gimple_set_location (g, gimple_location (stmt));
15305 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15306 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15307 gimple_set_location (g, gimple_location (stmt));
15308 gsi_replace (gsi, g, true);
15310 /* Flavors of vec_rotate_left. */
15311 case ALTIVEC_BUILTIN_VRLB:
15312 case ALTIVEC_BUILTIN_VRLH:
15313 case ALTIVEC_BUILTIN_VRLW:
15314 case P8V_BUILTIN_VRLD:
15315 arg0 = gimple_call_arg (stmt, 0);
15316 arg1 = gimple_call_arg (stmt, 1);
15317 lhs = gimple_call_lhs (stmt);
15318 g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
15319 gimple_set_location (g, gimple_location (stmt));
15320 gsi_replace (gsi, g, true);
15322 /* Flavors of vector shift right algebraic.
15323 vec_sra{b,h,w} -> vsra{b,h,w}. */
15324 case ALTIVEC_BUILTIN_VSRAB:
15325 case ALTIVEC_BUILTIN_VSRAH:
15326 case ALTIVEC_BUILTIN_VSRAW:
15327 case P8V_BUILTIN_VSRAD:
15329 arg0 = gimple_call_arg (stmt, 0);
15330 arg1 = gimple_call_arg (stmt, 1);
15331 lhs = gimple_call_lhs (stmt);
15332 tree arg1_type = TREE_TYPE (arg1);
15333 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15334 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15335 location_t loc = gimple_location (stmt);
15336 /* Force arg1 into the range valid matching the arg0 type. */
15337 /* Build a vector consisting of the max valid bit-size values. */
15338 int n_elts = VECTOR_CST_NELTS (arg1);
15339 tree element_size = build_int_cst (unsigned_element_type,
15341 tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
15342 for (int i = 0; i < n_elts; i++)
15343 elts.safe_push (element_size);
15344 tree modulo_tree = elts.build ();
15345 /* Modulo the provided shift value against that vector. */
15346 gimple_seq stmts = NULL;
15347 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15348 unsigned_arg1_type, arg1);
15349 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15350 unsigned_arg1_type, unsigned_arg1,
15352 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15353 /* And finally, do the shift. */
15354 g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, new_arg1);
15355 gimple_set_location (g, loc);
15356 gsi_replace (gsi, g, true);
15359 /* Flavors of vector shift left.
15360 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
15361 case ALTIVEC_BUILTIN_VSLB:
15362 case ALTIVEC_BUILTIN_VSLH:
15363 case ALTIVEC_BUILTIN_VSLW:
15364 case P8V_BUILTIN_VSLD:
15367 gimple_seq stmts = NULL;
15368 arg0 = gimple_call_arg (stmt, 0);
15369 tree arg0_type = TREE_TYPE (arg0);
15370 if (INTEGRAL_TYPE_P (TREE_TYPE (arg0_type))
15371 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0_type)))
15373 arg1 = gimple_call_arg (stmt, 1);
15374 tree arg1_type = TREE_TYPE (arg1);
15375 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15376 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15377 loc = gimple_location (stmt);
15378 lhs = gimple_call_lhs (stmt);
15379 /* Force arg1 into the range valid matching the arg0 type. */
15380 /* Build a vector consisting of the max valid bit-size values. */
15381 int n_elts = VECTOR_CST_NELTS (arg1);
15382 int tree_size_in_bits = TREE_INT_CST_LOW (size_in_bytes (arg1_type))
15384 tree element_size = build_int_cst (unsigned_element_type,
15385 tree_size_in_bits / n_elts);
15386 tree_vector_builder elts (unsigned_type_for (arg1_type), n_elts, 1);
15387 for (int i = 0; i < n_elts; i++)
15388 elts.safe_push (element_size);
15389 tree modulo_tree = elts.build ();
15390 /* Modulo the provided shift value against that vector. */
15391 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15392 unsigned_arg1_type, arg1);
15393 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15394 unsigned_arg1_type, unsigned_arg1,
15396 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15397 /* And finally, do the shift. */
15398 g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, new_arg1);
15399 gimple_set_location (g, gimple_location (stmt));
15400 gsi_replace (gsi, g, true);
15403 /* Flavors of vector shift right. */
15404 case ALTIVEC_BUILTIN_VSRB:
15405 case ALTIVEC_BUILTIN_VSRH:
15406 case ALTIVEC_BUILTIN_VSRW:
15407 case P8V_BUILTIN_VSRD:
15409 arg0 = gimple_call_arg (stmt, 0);
15410 arg1 = gimple_call_arg (stmt, 1);
15411 lhs = gimple_call_lhs (stmt);
15412 tree arg1_type = TREE_TYPE (arg1);
15413 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15414 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15415 location_t loc = gimple_location (stmt);
15416 gimple_seq stmts = NULL;
15417 /* Convert arg0 to unsigned. */
15419 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15420 unsigned_type_for (TREE_TYPE (arg0)), arg0);
15421 /* Force arg1 into the range valid matching the arg0 type. */
15422 /* Build a vector consisting of the max valid bit-size values. */
15423 int n_elts = VECTOR_CST_NELTS (arg1);
15424 tree element_size = build_int_cst (unsigned_element_type,
15426 tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
15427 for (int i = 0; i < n_elts; i++)
15428 elts.safe_push (element_size);
15429 tree modulo_tree = elts.build ();
15430 /* Modulo the provided shift value against that vector. */
15431 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15432 unsigned_arg1_type, arg1);
15433 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15434 unsigned_arg1_type, unsigned_arg1,
15436 /* Do the shift. */
15438 = gimple_build (&stmts, RSHIFT_EXPR,
15439 TREE_TYPE (arg0_unsigned), arg0_unsigned, new_arg1);
15440 /* Convert result back to the lhs type. */
15441 res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
15442 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15443 update_call_from_tree (gsi, res);
15446 /* Vector loads. */
15447 case ALTIVEC_BUILTIN_LVX_V16QI:
15448 case ALTIVEC_BUILTIN_LVX_V8HI:
15449 case ALTIVEC_BUILTIN_LVX_V4SI:
15450 case ALTIVEC_BUILTIN_LVX_V4SF:
15451 case ALTIVEC_BUILTIN_LVX_V2DI:
15452 case ALTIVEC_BUILTIN_LVX_V2DF:
15453 case ALTIVEC_BUILTIN_LVX_V1TI:
15455 arg0 = gimple_call_arg (stmt, 0); // offset
15456 arg1 = gimple_call_arg (stmt, 1); // address
15457 lhs = gimple_call_lhs (stmt);
15458 location_t loc = gimple_location (stmt);
15459 /* Since arg1 may be cast to a different type, just use ptr_type_node
15460 here instead of trying to enforce TBAA on pointer types. */
15461 tree arg1_type = ptr_type_node;
15462 tree lhs_type = TREE_TYPE (lhs);
15463 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15464 the tree using the value from arg0. The resulting type will match
15465 the type of arg1. */
15466 gimple_seq stmts = NULL;
15467 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
15468 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15469 arg1_type, arg1, temp_offset);
15470 /* Mask off any lower bits from the address. */
15471 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15472 arg1_type, temp_addr,
15473 build_int_cst (arg1_type, -16));
15474 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15475 if (!is_gimple_mem_ref_addr (aligned_addr))
15477 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
15478 gimple *g = gimple_build_assign (t, aligned_addr);
15479 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15482 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
15483 take an offset, but since we've already incorporated the offset
15484 above, here we just pass in a zero. */
15486 = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
15487 build_int_cst (arg1_type, 0)));
15488 gimple_set_location (g, loc);
15489 gsi_replace (gsi, g, true);
15492 /* Vector stores. */
15493 case ALTIVEC_BUILTIN_STVX_V16QI:
15494 case ALTIVEC_BUILTIN_STVX_V8HI:
15495 case ALTIVEC_BUILTIN_STVX_V4SI:
15496 case ALTIVEC_BUILTIN_STVX_V4SF:
15497 case ALTIVEC_BUILTIN_STVX_V2DI:
15498 case ALTIVEC_BUILTIN_STVX_V2DF:
15500 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
15501 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
15502 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
15503 location_t loc = gimple_location (stmt);
15504 tree arg0_type = TREE_TYPE (arg0);
15505 /* Use ptr_type_node (no TBAA) for the arg2_type.
15506 FIXME: (Richard) "A proper fix would be to transition this type as
15507 seen from the frontend to GIMPLE, for example in a similar way we
15508 do for MEM_REFs by piggy-backing that on an extra argument, a
15509 constant zero pointer of the alias pointer type to use (which would
15510 also serve as a type indicator of the store itself). I'd use a
15511 target specific internal function for this (not sure if we can have
15512 those target specific, but I guess if it's folded away then that's
15513 fine) and get away with the overload set." */
15514 tree arg2_type = ptr_type_node;
15515 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15516 the tree using the value from arg0. The resulting type will match
15517 the type of arg2. */
15518 gimple_seq stmts = NULL;
15519 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
15520 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15521 arg2_type, arg2, temp_offset);
15522 /* Mask off any lower bits from the address. */
15523 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15524 arg2_type, temp_addr,
15525 build_int_cst (arg2_type, -16));
15526 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15527 if (!is_gimple_mem_ref_addr (aligned_addr))
15529 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
15530 gimple *g = gimple_build_assign (t, aligned_addr);
15531 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15534 /* The desired gimple result should be similar to:
15535 MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */
15537 = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr,
15538 build_int_cst (arg2_type, 0)), arg0);
15539 gimple_set_location (g, loc);
15540 gsi_replace (gsi, g, true);
15544 /* unaligned Vector loads. */
15545 case VSX_BUILTIN_LXVW4X_V16QI:
15546 case VSX_BUILTIN_LXVW4X_V8HI:
15547 case VSX_BUILTIN_LXVW4X_V4SF:
15548 case VSX_BUILTIN_LXVW4X_V4SI:
15549 case VSX_BUILTIN_LXVD2X_V2DF:
15550 case VSX_BUILTIN_LXVD2X_V2DI:
15552 arg0 = gimple_call_arg (stmt, 0); // offset
15553 arg1 = gimple_call_arg (stmt, 1); // address
15554 lhs = gimple_call_lhs (stmt);
15555 location_t loc = gimple_location (stmt);
15556 /* Since arg1 may be cast to a different type, just use ptr_type_node
15557 here instead of trying to enforce TBAA on pointer types. */
15558 tree arg1_type = ptr_type_node;
15559 tree lhs_type = TREE_TYPE (lhs);
15560 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
15561 required alignment (power) is 4 bytes regardless of data type. */
15562 tree align_ltype = build_aligned_type (lhs_type, 4);
15563 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15564 the tree using the value from arg0. The resulting type will match
15565 the type of arg1. */
15566 gimple_seq stmts = NULL;
15567 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
15568 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15569 arg1_type, arg1, temp_offset);
15570 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15571 if (!is_gimple_mem_ref_addr (temp_addr))
15573 tree t = make_ssa_name (TREE_TYPE (temp_addr));
15574 gimple *g = gimple_build_assign (t, temp_addr);
15575 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15578 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
15579 take an offset, but since we've already incorporated the offset
15580 above, here we just pass in a zero. */
15582 g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
15583 build_int_cst (arg1_type, 0)));
15584 gimple_set_location (g, loc);
15585 gsi_replace (gsi, g, true);
15589 /* unaligned Vector stores. */
15590 case VSX_BUILTIN_STXVW4X_V16QI:
15591 case VSX_BUILTIN_STXVW4X_V8HI:
15592 case VSX_BUILTIN_STXVW4X_V4SF:
15593 case VSX_BUILTIN_STXVW4X_V4SI:
15594 case VSX_BUILTIN_STXVD2X_V2DF:
15595 case VSX_BUILTIN_STXVD2X_V2DI:
15597 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
15598 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
15599 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
15600 location_t loc = gimple_location (stmt);
15601 tree arg0_type = TREE_TYPE (arg0);
15602 /* Use ptr_type_node (no TBAA) for the arg2_type. */
15603 tree arg2_type = ptr_type_node;
15604 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
15605 required alignment (power) is 4 bytes regardless of data type. */
15606 tree align_stype = build_aligned_type (arg0_type, 4);
15607 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15608 the tree using the value from arg1. */
15609 gimple_seq stmts = NULL;
15610 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
15611 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15612 arg2_type, arg2, temp_offset);
15613 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15614 if (!is_gimple_mem_ref_addr (temp_addr))
15616 tree t = make_ssa_name (TREE_TYPE (temp_addr));
15617 gimple *g = gimple_build_assign (t, temp_addr);
15618 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15622 g = gimple_build_assign (build2 (MEM_REF, align_stype, temp_addr,
15623 build_int_cst (arg2_type, 0)), arg0);
15624 gimple_set_location (g, loc);
15625 gsi_replace (gsi, g, true);
15629 /* Vector Fused multiply-add (fma). */
15630 case ALTIVEC_BUILTIN_VMADDFP:
15631 case VSX_BUILTIN_XVMADDDP:
15632 case ALTIVEC_BUILTIN_VMLADDUHM:
15634 arg0 = gimple_call_arg (stmt, 0);
15635 arg1 = gimple_call_arg (stmt, 1);
15636 tree arg2 = gimple_call_arg (stmt, 2);
15637 lhs = gimple_call_lhs (stmt);
15638 gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2);
15639 gimple_call_set_lhs (g, lhs);
15640 gimple_call_set_nothrow (g, true);
15641 gimple_set_location (g, gimple_location (stmt));
15642 gsi_replace (gsi, g, true);
15646 /* Vector compares; EQ, NE, GE, GT, LE. */
15647 case ALTIVEC_BUILTIN_VCMPEQUB:
15648 case ALTIVEC_BUILTIN_VCMPEQUH:
15649 case ALTIVEC_BUILTIN_VCMPEQUW:
15650 case P8V_BUILTIN_VCMPEQUD:
15651 fold_compare_helper (gsi, EQ_EXPR, stmt);
15654 case P9V_BUILTIN_CMPNEB:
15655 case P9V_BUILTIN_CMPNEH:
15656 case P9V_BUILTIN_CMPNEW:
15657 fold_compare_helper (gsi, NE_EXPR, stmt);
15660 case VSX_BUILTIN_CMPGE_16QI:
15661 case VSX_BUILTIN_CMPGE_U16QI:
15662 case VSX_BUILTIN_CMPGE_8HI:
15663 case VSX_BUILTIN_CMPGE_U8HI:
15664 case VSX_BUILTIN_CMPGE_4SI:
15665 case VSX_BUILTIN_CMPGE_U4SI:
15666 case VSX_BUILTIN_CMPGE_2DI:
15667 case VSX_BUILTIN_CMPGE_U2DI:
15668 fold_compare_helper (gsi, GE_EXPR, stmt);
15671 case ALTIVEC_BUILTIN_VCMPGTSB:
15672 case ALTIVEC_BUILTIN_VCMPGTUB:
15673 case ALTIVEC_BUILTIN_VCMPGTSH:
15674 case ALTIVEC_BUILTIN_VCMPGTUH:
15675 case ALTIVEC_BUILTIN_VCMPGTSW:
15676 case ALTIVEC_BUILTIN_VCMPGTUW:
15677 case P8V_BUILTIN_VCMPGTUD:
15678 case P8V_BUILTIN_VCMPGTSD:
15679 fold_compare_helper (gsi, GT_EXPR, stmt);
15682 case VSX_BUILTIN_CMPLE_16QI:
15683 case VSX_BUILTIN_CMPLE_U16QI:
15684 case VSX_BUILTIN_CMPLE_8HI:
15685 case VSX_BUILTIN_CMPLE_U8HI:
15686 case VSX_BUILTIN_CMPLE_4SI:
15687 case VSX_BUILTIN_CMPLE_U4SI:
15688 case VSX_BUILTIN_CMPLE_2DI:
15689 case VSX_BUILTIN_CMPLE_U2DI:
15690 fold_compare_helper (gsi, LE_EXPR, stmt);
15693 /* flavors of vec_splat_[us]{8,16,32}. */
15694 case ALTIVEC_BUILTIN_VSPLTISB:
15695 case ALTIVEC_BUILTIN_VSPLTISH:
15696 case ALTIVEC_BUILTIN_VSPLTISW:
15698 arg0 = gimple_call_arg (stmt, 0);
15699 lhs = gimple_call_lhs (stmt);
15701 /* Only fold the vec_splat_*() if the lower bits of arg 0 is a
15702 5-bit signed constant in range -16 to +15. */
15703 if (TREE_CODE (arg0) != INTEGER_CST
15704 || !IN_RANGE (TREE_INT_CST_LOW (arg0), -16, 15))
15706 gimple_seq stmts = NULL;
15707 location_t loc = gimple_location (stmt);
15708 tree splat_value = gimple_convert (&stmts, loc,
15709 TREE_TYPE (TREE_TYPE (lhs)), arg0);
15710 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15711 tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value);
15712 g = gimple_build_assign (lhs, splat_tree);
15713 gimple_set_location (g, gimple_location (stmt));
15714 gsi_replace (gsi, g, true);
15718 /* Flavors of vec_splat. */
15719 /* a = vec_splat (b, 0x3) becomes a = { b[3],b[3],b[3],...}; */
15720 case ALTIVEC_BUILTIN_VSPLTB:
15721 case ALTIVEC_BUILTIN_VSPLTH:
15722 case ALTIVEC_BUILTIN_VSPLTW:
15723 case VSX_BUILTIN_XXSPLTD_V2DI:
15724 case VSX_BUILTIN_XXSPLTD_V2DF:
15726 arg0 = gimple_call_arg (stmt, 0); /* input vector. */
15727 arg1 = gimple_call_arg (stmt, 1); /* index into arg0. */
15728 /* Only fold the vec_splat_*() if arg1 is both a constant value and
15729 is a valid index into the arg0 vector. */
15730 unsigned int n_elts = VECTOR_CST_NELTS (arg0);
15731 if (TREE_CODE (arg1) != INTEGER_CST
15732 || TREE_INT_CST_LOW (arg1) > (n_elts -1))
15734 lhs = gimple_call_lhs (stmt);
15735 tree lhs_type = TREE_TYPE (lhs);
15736 tree arg0_type = TREE_TYPE (arg0);
15738 if (TREE_CODE (arg0) == VECTOR_CST)
15739 splat = VECTOR_CST_ELT (arg0, TREE_INT_CST_LOW (arg1));
15742 /* Determine (in bits) the length and start location of the
15743 splat value for a call to the tree_vec_extract helper. */
15744 int splat_elem_size = TREE_INT_CST_LOW (size_in_bytes (arg0_type))
15745 * BITS_PER_UNIT / n_elts;
15746 int splat_start_bit = TREE_INT_CST_LOW (arg1) * splat_elem_size;
15747 tree len = build_int_cst (bitsizetype, splat_elem_size);
15748 tree start = build_int_cst (bitsizetype, splat_start_bit);
15749 splat = tree_vec_extract (gsi, TREE_TYPE (lhs_type), arg0,
15752 /* And finally, build the new vector. */
15753 tree splat_tree = build_vector_from_val (lhs_type, splat);
15754 g = gimple_build_assign (lhs, splat_tree);
15755 gimple_set_location (g, gimple_location (stmt));
15756 gsi_replace (gsi, g, true);
15760 /* vec_mergel (integrals). */
15761 case ALTIVEC_BUILTIN_VMRGLH:
15762 case ALTIVEC_BUILTIN_VMRGLW:
15763 case VSX_BUILTIN_XXMRGLW_4SI:
15764 case ALTIVEC_BUILTIN_VMRGLB:
15765 case VSX_BUILTIN_VEC_MERGEL_V2DI:
15766 case VSX_BUILTIN_XXMRGLW_4SF:
15767 case VSX_BUILTIN_VEC_MERGEL_V2DF:
15768 fold_mergehl_helper (gsi, stmt, 1);
15770 /* vec_mergeh (integrals). */
15771 case ALTIVEC_BUILTIN_VMRGHH:
15772 case ALTIVEC_BUILTIN_VMRGHW:
15773 case VSX_BUILTIN_XXMRGHW_4SI:
15774 case ALTIVEC_BUILTIN_VMRGHB:
15775 case VSX_BUILTIN_VEC_MERGEH_V2DI:
15776 case VSX_BUILTIN_XXMRGHW_4SF:
15777 case VSX_BUILTIN_VEC_MERGEH_V2DF:
15778 fold_mergehl_helper (gsi, stmt, 0);
15781 /* Flavors of vec_mergee. */
15782 case P8V_BUILTIN_VMRGEW_V4SI:
15783 case P8V_BUILTIN_VMRGEW_V2DI:
15784 case P8V_BUILTIN_VMRGEW_V4SF:
15785 case P8V_BUILTIN_VMRGEW_V2DF:
15786 fold_mergeeo_helper (gsi, stmt, 0);
15788 /* Flavors of vec_mergeo. */
15789 case P8V_BUILTIN_VMRGOW_V4SI:
15790 case P8V_BUILTIN_VMRGOW_V2DI:
15791 case P8V_BUILTIN_VMRGOW_V4SF:
15792 case P8V_BUILTIN_VMRGOW_V2DF:
15793 fold_mergeeo_helper (gsi, stmt, 1);
15796 /* d = vec_pack (a, b) */
15797 case P8V_BUILTIN_VPKUDUM:
15798 case ALTIVEC_BUILTIN_VPKUHUM:
15799 case ALTIVEC_BUILTIN_VPKUWUM:
15801 arg0 = gimple_call_arg (stmt, 0);
15802 arg1 = gimple_call_arg (stmt, 1);
15803 lhs = gimple_call_lhs (stmt);
15804 gimple *g = gimple_build_assign (lhs, VEC_PACK_TRUNC_EXPR, arg0, arg1);
15805 gimple_set_location (g, gimple_location (stmt));
15806 gsi_replace (gsi, g, true);
15810 /* d = vec_unpackh (a) */
15811 /* Note that the UNPACK_{HI,LO}_EXPR used in the gimple_build_assign call
15812 in this code is sensitive to endian-ness, and needs to be inverted to
15813 handle both LE and BE targets. */
15814 case ALTIVEC_BUILTIN_VUPKHSB:
15815 case ALTIVEC_BUILTIN_VUPKHSH:
15816 case P8V_BUILTIN_VUPKHSW:
15818 arg0 = gimple_call_arg (stmt, 0);
15819 lhs = gimple_call_lhs (stmt);
15820 if (BYTES_BIG_ENDIAN)
15821 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
15823 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
15824 gimple_set_location (g, gimple_location (stmt));
15825 gsi_replace (gsi, g, true);
15828 /* d = vec_unpackl (a) */
15829 case ALTIVEC_BUILTIN_VUPKLSB:
15830 case ALTIVEC_BUILTIN_VUPKLSH:
15831 case P8V_BUILTIN_VUPKLSW:
15833 arg0 = gimple_call_arg (stmt, 0);
15834 lhs = gimple_call_lhs (stmt);
15835 if (BYTES_BIG_ENDIAN)
15836 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
15838 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
15839 gimple_set_location (g, gimple_location (stmt));
15840 gsi_replace (gsi, g, true);
15843 /* There is no gimple type corresponding with pixel, so just return. */
15844 case ALTIVEC_BUILTIN_VUPKHPX:
15845 case ALTIVEC_BUILTIN_VUPKLPX:
15849 case ALTIVEC_BUILTIN_VPERM_16QI:
15850 case ALTIVEC_BUILTIN_VPERM_8HI:
15851 case ALTIVEC_BUILTIN_VPERM_4SI:
15852 case ALTIVEC_BUILTIN_VPERM_2DI:
15853 case ALTIVEC_BUILTIN_VPERM_4SF:
15854 case ALTIVEC_BUILTIN_VPERM_2DF:
15856 arg0 = gimple_call_arg (stmt, 0);
15857 arg1 = gimple_call_arg (stmt, 1);
15858 tree permute = gimple_call_arg (stmt, 2);
15859 lhs = gimple_call_lhs (stmt);
15860 location_t loc = gimple_location (stmt);
15861 gimple_seq stmts = NULL;
15862 // convert arg0 and arg1 to match the type of the permute
15863 // for the VEC_PERM_EXPR operation.
15864 tree permute_type = (TREE_TYPE (permute));
15865 tree arg0_ptype = gimple_convert (&stmts, loc, permute_type, arg0);
15866 tree arg1_ptype = gimple_convert (&stmts, loc, permute_type, arg1);
15867 tree lhs_ptype = gimple_build (&stmts, loc, VEC_PERM_EXPR,
15868 permute_type, arg0_ptype, arg1_ptype,
15870 // Convert the result back to the desired lhs type upon completion.
15871 tree temp = gimple_convert (&stmts, loc, TREE_TYPE (lhs), lhs_ptype);
15872 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15873 g = gimple_build_assign (lhs, temp);
15874 gimple_set_location (g, loc);
15875 gsi_replace (gsi, g, true);
15880 if (TARGET_DEBUG_BUILTIN)
15881 fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
15882 fn_code, fn_name1, fn_name2);
15889 /* Expand an expression EXP that calls a built-in function,
15890 with result going to TARGET if that's convenient
15891 (and in mode MODE if that's convenient).
15892 SUBTARGET may be used as the target for computing one of EXP's operands.
15893 IGNORE is nonzero if the value is to be ignored. */
15896 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15897 machine_mode mode ATTRIBUTE_UNUSED,
15898 int ignore ATTRIBUTE_UNUSED)
15900 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15901 enum rs6000_builtins fcode
15902 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
15903 size_t uns_fcode = (size_t)fcode;
15904 const struct builtin_description *d;
15908 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
15909 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
15910 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
15912 /* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
15913 floating point type, depending on whether long double is the IBM extended
15914 double (KFmode) or long double is IEEE 128-bit (TFmode). It is simpler if
15915 we only define one variant of the built-in function, and switch the code
15916 when defining it, rather than defining two built-ins and using the
15917 overload table in rs6000-c.c to switch between the two. If we don't have
15918 the proper assembler, don't do this switch because CODE_FOR_*kf* and
15919 CODE_FOR_*tf* will be CODE_FOR_nothing. */
15920 if (FLOAT128_IEEE_P (TFmode))
15926 case CODE_FOR_sqrtkf2_odd: icode = CODE_FOR_sqrttf2_odd; break;
15927 case CODE_FOR_trunckfdf2_odd: icode = CODE_FOR_trunctfdf2_odd; break;
15928 case CODE_FOR_addkf3_odd: icode = CODE_FOR_addtf3_odd; break;
15929 case CODE_FOR_subkf3_odd: icode = CODE_FOR_subtf3_odd; break;
15930 case CODE_FOR_mulkf3_odd: icode = CODE_FOR_multf3_odd; break;
15931 case CODE_FOR_divkf3_odd: icode = CODE_FOR_divtf3_odd; break;
15932 case CODE_FOR_fmakf4_odd: icode = CODE_FOR_fmatf4_odd; break;
15933 case CODE_FOR_xsxexpqp_kf: icode = CODE_FOR_xsxexpqp_tf; break;
15934 case CODE_FOR_xsxsigqp_kf: icode = CODE_FOR_xsxsigqp_tf; break;
15935 case CODE_FOR_xststdcnegqp_kf: icode = CODE_FOR_xststdcnegqp_tf; break;
15936 case CODE_FOR_xsiexpqp_kf: icode = CODE_FOR_xsiexpqp_tf; break;
15937 case CODE_FOR_xsiexpqpf_kf: icode = CODE_FOR_xsiexpqpf_tf; break;
15938 case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
15941 if (TARGET_DEBUG_BUILTIN)
15943 const char *name1 = rs6000_builtin_info[uns_fcode].name;
15944 const char *name2 = (icode != CODE_FOR_nothing)
15945 ? get_insn_name ((int) icode)
15949 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
15951 default: name3 = "unknown"; break;
15952 case RS6000_BTC_SPECIAL: name3 = "special"; break;
15953 case RS6000_BTC_UNARY: name3 = "unary"; break;
15954 case RS6000_BTC_BINARY: name3 = "binary"; break;
15955 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
15956 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
15957 case RS6000_BTC_ABS: name3 = "abs"; break;
15958 case RS6000_BTC_DST: name3 = "dst"; break;
15963 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
15964 (name1) ? name1 : "---", fcode,
15965 (name2) ? name2 : "---", (int) icode,
15967 func_valid_p ? "" : ", not valid");
15972 rs6000_invalid_builtin (fcode);
15974 /* Given it is invalid, just generate a normal call. */
15975 return expand_call (exp, target, ignore);
15980 case RS6000_BUILTIN_RECIP:
15981 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
15983 case RS6000_BUILTIN_RECIPF:
15984 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
15986 case RS6000_BUILTIN_RSQRTF:
15987 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
15989 case RS6000_BUILTIN_RSQRT:
15990 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
15992 case POWER7_BUILTIN_BPERMD:
15993 return rs6000_expand_binop_builtin (((TARGET_64BIT)
15994 ? CODE_FOR_bpermd_di
15995 : CODE_FOR_bpermd_si), exp, target);
15997 case RS6000_BUILTIN_GET_TB:
15998 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16001 case RS6000_BUILTIN_MFTB:
16002 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16003 ? CODE_FOR_rs6000_mftb_di
16004 : CODE_FOR_rs6000_mftb_si),
16007 case RS6000_BUILTIN_MFFS:
16008 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16010 case RS6000_BUILTIN_MTFSB0:
16011 return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
16013 case RS6000_BUILTIN_MTFSB1:
16014 return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
16016 case RS6000_BUILTIN_SET_FPSCR_RN:
16017 return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
16020 case RS6000_BUILTIN_SET_FPSCR_DRN:
16022 rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
16025 case RS6000_BUILTIN_MFFSL:
16026 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
16028 case RS6000_BUILTIN_MTFSF:
16029 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16031 case RS6000_BUILTIN_CPU_INIT:
16032 case RS6000_BUILTIN_CPU_IS:
16033 case RS6000_BUILTIN_CPU_SUPPORTS:
16034 return cpu_expand_builtin (fcode, exp, target);
16036 case MISC_BUILTIN_SPEC_BARRIER:
16038 emit_insn (gen_speculation_barrier ());
16042 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16043 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16045 int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16046 : (int) CODE_FOR_altivec_lvsl_direct);
16047 machine_mode tmode = insn_data[icode2].operand[0].mode;
16048 machine_mode mode = insn_data[icode2].operand[1].mode;
16052 gcc_assert (TARGET_ALTIVEC);
16054 arg = CALL_EXPR_ARG (exp, 0);
16055 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16056 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16057 addr = memory_address (mode, op);
16058 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16062 /* For the load case need to negate the address. */
16063 op = gen_reg_rtx (GET_MODE (addr));
16064 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16066 op = gen_rtx_MEM (mode, op);
16069 || GET_MODE (target) != tmode
16070 || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
16071 target = gen_reg_rtx (tmode);
16073 pat = GEN_FCN (icode2) (target, op);
16081 case ALTIVEC_BUILTIN_VCFUX:
16082 case ALTIVEC_BUILTIN_VCFSX:
16083 case ALTIVEC_BUILTIN_VCTUXS:
16084 case ALTIVEC_BUILTIN_VCTSXS:
16085 /* FIXME: There's got to be a nicer way to handle this case than
16086 constructing a new CALL_EXPR. */
16087 if (call_expr_nargs (exp) == 1)
16089 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16090 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16094 /* For the pack and unpack int128 routines, fix up the builtin so it
16095 uses the correct IBM128 type. */
16096 case MISC_BUILTIN_PACK_IF:
16097 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
16099 icode = CODE_FOR_packtf;
16100 fcode = MISC_BUILTIN_PACK_TF;
16101 uns_fcode = (size_t)fcode;
16105 case MISC_BUILTIN_UNPACK_IF:
16106 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
16108 icode = CODE_FOR_unpacktf;
16109 fcode = MISC_BUILTIN_UNPACK_TF;
16110 uns_fcode = (size_t)fcode;
16118 if (TARGET_ALTIVEC)
16120 ret = altivec_expand_builtin (exp, target, &success);
16127 ret = htm_expand_builtin (exp, target, &success);
16133 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16134 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16135 gcc_assert (attr == RS6000_BTC_UNARY
16136 || attr == RS6000_BTC_BINARY
16137 || attr == RS6000_BTC_TERNARY
16138 || attr == RS6000_BTC_SPECIAL);
16140 /* Handle simple unary operations. */
16142 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16143 if (d->code == fcode)
16144 return rs6000_expand_unop_builtin (icode, exp, target);
16146 /* Handle simple binary operations. */
16148 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16149 if (d->code == fcode)
16150 return rs6000_expand_binop_builtin (icode, exp, target);
16152 /* Handle simple ternary operations. */
16154 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16155 if (d->code == fcode)
16156 return rs6000_expand_ternop_builtin (icode, exp, target);
16158 /* Handle simple no-argument operations. */
16160 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16161 if (d->code == fcode)
16162 return rs6000_expand_zeroop_builtin (icode, target);
16164 gcc_unreachable ();
16167 /* Create a builtin vector type with a name. Taking care not to give
16168 the canonical type a name. */
16171 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
16173 tree result = build_vector_type (elt_type, num_elts);
16175 /* Copy so we don't give the canonical type a name. */
16176 result = build_variant_type_copy (result);
16178 add_builtin_type (name, result);
16184 rs6000_init_builtins (void)
16190 if (TARGET_DEBUG_BUILTIN)
16191 fprintf (stderr, "rs6000_init_builtins%s%s\n",
16192 (TARGET_ALTIVEC) ? ", altivec" : "",
16193 (TARGET_VSX) ? ", vsx" : "");
16195 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
16196 : "__vector long long",
16197 intDI_type_node, 2);
16198 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
16199 V4SI_type_node = rs6000_vector_type ("__vector signed int",
16200 intSI_type_node, 4);
16201 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
16202 V8HI_type_node = rs6000_vector_type ("__vector signed short",
16203 intHI_type_node, 8);
16204 V16QI_type_node = rs6000_vector_type ("__vector signed char",
16205 intQI_type_node, 16);
16207 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
16208 unsigned_intQI_type_node, 16);
16209 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
16210 unsigned_intHI_type_node, 8);
16211 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
16212 unsigned_intSI_type_node, 4);
16213 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16214 ? "__vector unsigned long"
16215 : "__vector unsigned long long",
16216 unsigned_intDI_type_node, 2);
16218 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16220 const_str_type_node
16221 = build_pointer_type (build_qualified_type (char_type_node,
16224 /* We use V1TI mode as a special container to hold __int128_t items that
16225 must live in VSX registers. */
16226 if (intTI_type_node)
16228 V1TI_type_node = rs6000_vector_type ("__vector __int128",
16229 intTI_type_node, 1);
16230 unsigned_V1TI_type_node
16231 = rs6000_vector_type ("__vector unsigned __int128",
16232 unsigned_intTI_type_node, 1);
16235 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16236 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16237 'vector unsigned short'. */
16239 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16240 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16241 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16242 bool_long_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16243 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16245 long_integer_type_internal_node = long_integer_type_node;
16246 long_unsigned_type_internal_node = long_unsigned_type_node;
16247 long_long_integer_type_internal_node = long_long_integer_type_node;
16248 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16249 intQI_type_internal_node = intQI_type_node;
16250 uintQI_type_internal_node = unsigned_intQI_type_node;
16251 intHI_type_internal_node = intHI_type_node;
16252 uintHI_type_internal_node = unsigned_intHI_type_node;
16253 intSI_type_internal_node = intSI_type_node;
16254 uintSI_type_internal_node = unsigned_intSI_type_node;
16255 intDI_type_internal_node = intDI_type_node;
16256 uintDI_type_internal_node = unsigned_intDI_type_node;
16257 intTI_type_internal_node = intTI_type_node;
16258 uintTI_type_internal_node = unsigned_intTI_type_node;
16259 float_type_internal_node = float_type_node;
16260 double_type_internal_node = double_type_node;
16261 long_double_type_internal_node = long_double_type_node;
16262 dfloat64_type_internal_node = dfloat64_type_node;
16263 dfloat128_type_internal_node = dfloat128_type_node;
16264 void_type_internal_node = void_type_node;
16266 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16267 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16268 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16269 format that uses a pair of doubles, depending on the switches and
16272 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16273 floating point, we need make sure the type is non-zero or else self-test
16274 fails during bootstrap.
16276 Always create __ibm128 as a separate type, even if the current long double
16277 format is IBM extended double.
16279 For IEEE 128-bit floating point, always create the type __ieee128. If the
16280 user used -mfloat128, rs6000-c.c will create a define from __float128 to
16282 if (TARGET_FLOAT128_TYPE)
16284 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
16285 ibm128_float_type_node = long_double_type_node;
16288 ibm128_float_type_node = make_node (REAL_TYPE);
16289 TYPE_PRECISION (ibm128_float_type_node) = 128;
16290 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16291 layout_type (ibm128_float_type_node);
16294 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16297 if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
16298 ieee128_float_type_node = long_double_type_node;
16300 ieee128_float_type_node = float128_type_node;
16302 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16307 ieee128_float_type_node = ibm128_float_type_node = long_double_type_node;
16309 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16311 builtin_mode_to_type[QImode][0] = integer_type_node;
16312 builtin_mode_to_type[HImode][0] = integer_type_node;
16313 builtin_mode_to_type[SImode][0] = intSI_type_node;
16314 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16315 builtin_mode_to_type[DImode][0] = intDI_type_node;
16316 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16317 builtin_mode_to_type[TImode][0] = intTI_type_node;
16318 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16319 builtin_mode_to_type[SFmode][0] = float_type_node;
16320 builtin_mode_to_type[DFmode][0] = double_type_node;
16321 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16322 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16323 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16324 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16325 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16326 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16327 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16328 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16329 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16330 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16331 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16332 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16333 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16334 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16335 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16336 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16337 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16339 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16340 TYPE_NAME (bool_char_type_node) = tdecl;
16342 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16343 TYPE_NAME (bool_short_type_node) = tdecl;
16345 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16346 TYPE_NAME (bool_int_type_node) = tdecl;
16348 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16349 TYPE_NAME (pixel_type_node) = tdecl;
16351 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
16352 bool_char_type_node, 16);
16353 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
16354 bool_short_type_node, 8);
16355 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
16356 bool_int_type_node, 4);
16357 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16358 ? "__vector __bool long"
16359 : "__vector __bool long long",
16360 bool_long_long_type_node, 2);
16361 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
16362 pixel_type_node, 8);
16364 /* Create Altivec and VSX builtins on machines with at least the
16365 general purpose extensions (970 and newer) to allow the use of
16366 the target attribute. */
16367 if (TARGET_EXTRA_BUILTINS)
16368 altivec_init_builtins ();
16370 htm_init_builtins ();
16372 if (TARGET_EXTRA_BUILTINS)
16373 rs6000_common_init_builtins ();
16375 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16376 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16377 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16379 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16380 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16381 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16383 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16384 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16385 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16387 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16388 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16389 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16391 mode = (TARGET_64BIT) ? DImode : SImode;
16392 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16393 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16394 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16396 ftype = build_function_type_list (unsigned_intDI_type_node,
16398 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16401 ftype = build_function_type_list (unsigned_intDI_type_node,
16404 ftype = build_function_type_list (unsigned_intSI_type_node,
16406 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16408 ftype = build_function_type_list (double_type_node, NULL_TREE);
16409 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16411 ftype = build_function_type_list (double_type_node, NULL_TREE);
16412 def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL);
16414 ftype = build_function_type_list (void_type_node,
16417 def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0);
16419 ftype = build_function_type_list (void_type_node,
16422 def_builtin ("__builtin_mtfsb1", ftype, RS6000_BUILTIN_MTFSB1);
16424 ftype = build_function_type_list (void_type_node,
16427 def_builtin ("__builtin_set_fpscr_rn", ftype, RS6000_BUILTIN_SET_FPSCR_RN);
16429 ftype = build_function_type_list (void_type_node,
16432 def_builtin ("__builtin_set_fpscr_drn", ftype, RS6000_BUILTIN_SET_FPSCR_DRN);
16434 ftype = build_function_type_list (void_type_node,
16435 intSI_type_node, double_type_node,
16437 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16439 ftype = build_function_type_list (void_type_node, NULL_TREE);
16440 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16441 def_builtin ("__builtin_ppc_speculation_barrier", ftype,
16442 MISC_BUILTIN_SPEC_BARRIER);
16444 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16446 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16447 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16449 /* AIX libm provides clog as __clog. */
16450 if (TARGET_XCOFF &&
16451 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
16452 set_user_assembler_name (tdecl, "__clog");
16454 #ifdef SUBTARGET_INIT_BUILTINS
16455 SUBTARGET_INIT_BUILTINS;
16459 /* Returns the rs6000 builtin decl for CODE. */
16462 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
16464 HOST_WIDE_INT fnmask;
16466 if (code >= RS6000_BUILTIN_COUNT)
16467 return error_mark_node;
16469 fnmask = rs6000_builtin_info[code].mask;
16470 if ((fnmask & rs6000_builtin_mask) != fnmask)
16472 rs6000_invalid_builtin ((enum rs6000_builtins)code);
16473 return error_mark_node;
16476 return rs6000_builtin_decls[code];
16480 altivec_init_builtins (void)
16482 const struct builtin_description *d;
16486 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16488 tree pvoid_type_node = build_pointer_type (void_type_node);
16490 tree pcvoid_type_node
16491 = build_pointer_type (build_qualified_type (void_type_node,
16494 tree int_ftype_opaque
16495 = build_function_type_list (integer_type_node,
16496 opaque_V4SI_type_node, NULL_TREE);
16497 tree opaque_ftype_opaque
16498 = build_function_type_list (integer_type_node, NULL_TREE);
16499 tree opaque_ftype_opaque_int
16500 = build_function_type_list (opaque_V4SI_type_node,
16501 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
16502 tree opaque_ftype_opaque_opaque_int
16503 = build_function_type_list (opaque_V4SI_type_node,
16504 opaque_V4SI_type_node, opaque_V4SI_type_node,
16505 integer_type_node, NULL_TREE);
16506 tree opaque_ftype_opaque_opaque_opaque
16507 = build_function_type_list (opaque_V4SI_type_node,
16508 opaque_V4SI_type_node, opaque_V4SI_type_node,
16509 opaque_V4SI_type_node, NULL_TREE);
16510 tree opaque_ftype_opaque_opaque
16511 = build_function_type_list (opaque_V4SI_type_node,
16512 opaque_V4SI_type_node, opaque_V4SI_type_node,
16514 tree int_ftype_int_opaque_opaque
16515 = build_function_type_list (integer_type_node,
16516 integer_type_node, opaque_V4SI_type_node,
16517 opaque_V4SI_type_node, NULL_TREE);
16518 tree int_ftype_int_v4si_v4si
16519 = build_function_type_list (integer_type_node,
16520 integer_type_node, V4SI_type_node,
16521 V4SI_type_node, NULL_TREE);
16522 tree int_ftype_int_v2di_v2di
16523 = build_function_type_list (integer_type_node,
16524 integer_type_node, V2DI_type_node,
16525 V2DI_type_node, NULL_TREE);
16526 tree void_ftype_v4si
16527 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
16528 tree v8hi_ftype_void
16529 = build_function_type_list (V8HI_type_node, NULL_TREE);
16530 tree void_ftype_void
16531 = build_function_type_list (void_type_node, NULL_TREE);
16532 tree void_ftype_int
16533 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
16535 tree opaque_ftype_long_pcvoid
16536 = build_function_type_list (opaque_V4SI_type_node,
16537 long_integer_type_node, pcvoid_type_node,
16539 tree v16qi_ftype_long_pcvoid
16540 = build_function_type_list (V16QI_type_node,
16541 long_integer_type_node, pcvoid_type_node,
16543 tree v8hi_ftype_long_pcvoid
16544 = build_function_type_list (V8HI_type_node,
16545 long_integer_type_node, pcvoid_type_node,
16547 tree v4si_ftype_long_pcvoid
16548 = build_function_type_list (V4SI_type_node,
16549 long_integer_type_node, pcvoid_type_node,
16551 tree v4sf_ftype_long_pcvoid
16552 = build_function_type_list (V4SF_type_node,
16553 long_integer_type_node, pcvoid_type_node,
16555 tree v2df_ftype_long_pcvoid
16556 = build_function_type_list (V2DF_type_node,
16557 long_integer_type_node, pcvoid_type_node,
16559 tree v2di_ftype_long_pcvoid
16560 = build_function_type_list (V2DI_type_node,
16561 long_integer_type_node, pcvoid_type_node,
16563 tree v1ti_ftype_long_pcvoid
16564 = build_function_type_list (V1TI_type_node,
16565 long_integer_type_node, pcvoid_type_node,
16568 tree void_ftype_opaque_long_pvoid
16569 = build_function_type_list (void_type_node,
16570 opaque_V4SI_type_node, long_integer_type_node,
16571 pvoid_type_node, NULL_TREE);
16572 tree void_ftype_v4si_long_pvoid
16573 = build_function_type_list (void_type_node,
16574 V4SI_type_node, long_integer_type_node,
16575 pvoid_type_node, NULL_TREE);
16576 tree void_ftype_v16qi_long_pvoid
16577 = build_function_type_list (void_type_node,
16578 V16QI_type_node, long_integer_type_node,
16579 pvoid_type_node, NULL_TREE);
16581 tree void_ftype_v16qi_pvoid_long
16582 = build_function_type_list (void_type_node,
16583 V16QI_type_node, pvoid_type_node,
16584 long_integer_type_node, NULL_TREE);
16586 tree void_ftype_v8hi_long_pvoid
16587 = build_function_type_list (void_type_node,
16588 V8HI_type_node, long_integer_type_node,
16589 pvoid_type_node, NULL_TREE);
16590 tree void_ftype_v4sf_long_pvoid
16591 = build_function_type_list (void_type_node,
16592 V4SF_type_node, long_integer_type_node,
16593 pvoid_type_node, NULL_TREE);
16594 tree void_ftype_v2df_long_pvoid
16595 = build_function_type_list (void_type_node,
16596 V2DF_type_node, long_integer_type_node,
16597 pvoid_type_node, NULL_TREE);
16598 tree void_ftype_v1ti_long_pvoid
16599 = build_function_type_list (void_type_node,
16600 V1TI_type_node, long_integer_type_node,
16601 pvoid_type_node, NULL_TREE);
16602 tree void_ftype_v2di_long_pvoid
16603 = build_function_type_list (void_type_node,
16604 V2DI_type_node, long_integer_type_node,
16605 pvoid_type_node, NULL_TREE);
16606 tree int_ftype_int_v8hi_v8hi
16607 = build_function_type_list (integer_type_node,
16608 integer_type_node, V8HI_type_node,
16609 V8HI_type_node, NULL_TREE);
16610 tree int_ftype_int_v16qi_v16qi
16611 = build_function_type_list (integer_type_node,
16612 integer_type_node, V16QI_type_node,
16613 V16QI_type_node, NULL_TREE);
16614 tree int_ftype_int_v4sf_v4sf
16615 = build_function_type_list (integer_type_node,
16616 integer_type_node, V4SF_type_node,
16617 V4SF_type_node, NULL_TREE);
16618 tree int_ftype_int_v2df_v2df
16619 = build_function_type_list (integer_type_node,
16620 integer_type_node, V2DF_type_node,
16621 V2DF_type_node, NULL_TREE);
16622 tree v2di_ftype_v2di
16623 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
16624 tree v4si_ftype_v4si
16625 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16626 tree v8hi_ftype_v8hi
16627 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16628 tree v16qi_ftype_v16qi
16629 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16630 tree v4sf_ftype_v4sf
16631 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16632 tree v2df_ftype_v2df
16633 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16634 tree void_ftype_pcvoid_int_int
16635 = build_function_type_list (void_type_node,
16636 pcvoid_type_node, integer_type_node,
16637 integer_type_node, NULL_TREE);
16639 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
16640 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
16641 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
16642 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
16643 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
16644 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
16645 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
16646 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
16647 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
16648 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
16649 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
16650 ALTIVEC_BUILTIN_LVXL_V2DF);
16651 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
16652 ALTIVEC_BUILTIN_LVXL_V2DI);
16653 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
16654 ALTIVEC_BUILTIN_LVXL_V4SF);
16655 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
16656 ALTIVEC_BUILTIN_LVXL_V4SI);
16657 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
16658 ALTIVEC_BUILTIN_LVXL_V8HI);
16659 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
16660 ALTIVEC_BUILTIN_LVXL_V16QI);
16661 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
16662 def_builtin ("__builtin_altivec_lvx_v1ti", v1ti_ftype_long_pcvoid,
16663 ALTIVEC_BUILTIN_LVX_V1TI);
16664 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
16665 ALTIVEC_BUILTIN_LVX_V2DF);
16666 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
16667 ALTIVEC_BUILTIN_LVX_V2DI);
16668 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
16669 ALTIVEC_BUILTIN_LVX_V4SF);
16670 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
16671 ALTIVEC_BUILTIN_LVX_V4SI);
16672 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
16673 ALTIVEC_BUILTIN_LVX_V8HI);
16674 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
16675 ALTIVEC_BUILTIN_LVX_V16QI);
16676 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
16677 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
16678 ALTIVEC_BUILTIN_STVX_V2DF);
16679 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
16680 ALTIVEC_BUILTIN_STVX_V2DI);
16681 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
16682 ALTIVEC_BUILTIN_STVX_V4SF);
16683 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
16684 ALTIVEC_BUILTIN_STVX_V4SI);
16685 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
16686 ALTIVEC_BUILTIN_STVX_V8HI);
16687 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
16688 ALTIVEC_BUILTIN_STVX_V16QI);
16689 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
16690 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
16691 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
16692 ALTIVEC_BUILTIN_STVXL_V2DF);
16693 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
16694 ALTIVEC_BUILTIN_STVXL_V2DI);
16695 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
16696 ALTIVEC_BUILTIN_STVXL_V4SF);
16697 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
16698 ALTIVEC_BUILTIN_STVXL_V4SI);
16699 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
16700 ALTIVEC_BUILTIN_STVXL_V8HI);
16701 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
16702 ALTIVEC_BUILTIN_STVXL_V16QI);
16703 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
16704 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
16705 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
16706 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
16707 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
16708 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
16709 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
16710 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
16711 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
16712 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
16713 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
16714 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
16715 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
16716 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
16717 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
16718 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
16720 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
16721 VSX_BUILTIN_LXVD2X_V2DF);
16722 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
16723 VSX_BUILTIN_LXVD2X_V2DI);
16724 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
16725 VSX_BUILTIN_LXVW4X_V4SF);
16726 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
16727 VSX_BUILTIN_LXVW4X_V4SI);
16728 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
16729 VSX_BUILTIN_LXVW4X_V8HI);
16730 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
16731 VSX_BUILTIN_LXVW4X_V16QI);
16732 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
16733 VSX_BUILTIN_STXVD2X_V2DF);
16734 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
16735 VSX_BUILTIN_STXVD2X_V2DI);
16736 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
16737 VSX_BUILTIN_STXVW4X_V4SF);
16738 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
16739 VSX_BUILTIN_STXVW4X_V4SI);
16740 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
16741 VSX_BUILTIN_STXVW4X_V8HI);
16742 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
16743 VSX_BUILTIN_STXVW4X_V16QI);
16745 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
16746 VSX_BUILTIN_LD_ELEMREV_V2DF);
16747 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
16748 VSX_BUILTIN_LD_ELEMREV_V2DI);
16749 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
16750 VSX_BUILTIN_LD_ELEMREV_V4SF);
16751 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
16752 VSX_BUILTIN_LD_ELEMREV_V4SI);
16753 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
16754 VSX_BUILTIN_LD_ELEMREV_V8HI);
16755 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
16756 VSX_BUILTIN_LD_ELEMREV_V16QI);
16757 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
16758 VSX_BUILTIN_ST_ELEMREV_V2DF);
16759 def_builtin ("__builtin_vsx_st_elemrev_v1ti", void_ftype_v1ti_long_pvoid,
16760 VSX_BUILTIN_ST_ELEMREV_V1TI);
16761 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
16762 VSX_BUILTIN_ST_ELEMREV_V2DI);
16763 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
16764 VSX_BUILTIN_ST_ELEMREV_V4SF);
16765 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
16766 VSX_BUILTIN_ST_ELEMREV_V4SI);
16767 def_builtin ("__builtin_vsx_st_elemrev_v8hi", void_ftype_v8hi_long_pvoid,
16768 VSX_BUILTIN_ST_ELEMREV_V8HI);
16769 def_builtin ("__builtin_vsx_st_elemrev_v16qi", void_ftype_v16qi_long_pvoid,
16770 VSX_BUILTIN_ST_ELEMREV_V16QI);
16772 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
16773 VSX_BUILTIN_VEC_LD);
16774 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
16775 VSX_BUILTIN_VEC_ST);
16776 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
16777 VSX_BUILTIN_VEC_XL);
16778 def_builtin ("__builtin_vec_xl_be", opaque_ftype_long_pcvoid,
16779 VSX_BUILTIN_VEC_XL_BE);
16780 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
16781 VSX_BUILTIN_VEC_XST);
16782 def_builtin ("__builtin_vec_xst_be", void_ftype_opaque_long_pvoid,
16783 VSX_BUILTIN_VEC_XST_BE);
16785 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
16786 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
16787 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
16789 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
16790 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
16791 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
16792 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
16793 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
16794 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
16795 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
16796 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
16797 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
16798 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
16799 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
16800 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
16802 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
16803 ALTIVEC_BUILTIN_VEC_ADDE);
16804 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
16805 ALTIVEC_BUILTIN_VEC_ADDEC);
16806 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
16807 ALTIVEC_BUILTIN_VEC_CMPNE);
16808 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
16809 ALTIVEC_BUILTIN_VEC_MUL);
16810 def_builtin ("__builtin_vec_sube", opaque_ftype_opaque_opaque_opaque,
16811 ALTIVEC_BUILTIN_VEC_SUBE);
16812 def_builtin ("__builtin_vec_subec", opaque_ftype_opaque_opaque_opaque,
16813 ALTIVEC_BUILTIN_VEC_SUBEC);
16815 /* Cell builtins. */
16816 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
16817 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
16818 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
16819 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
16821 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
16822 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
16823 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
16824 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
16826 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
16827 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
16828 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
16829 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
16831 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
16832 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
16833 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
16834 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
16836 if (TARGET_P9_VECTOR)
16838 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
16839 P9V_BUILTIN_STXVL);
16840 def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long,
16841 P9V_BUILTIN_XST_LEN_R);
16844 /* Add the DST variants. */
16846 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16848 HOST_WIDE_INT mask = d->mask;
16850 /* It is expected that these dst built-in functions may have
16851 d->icode equal to CODE_FOR_nothing. */
16852 if ((mask & builtin_mask) != mask)
16854 if (TARGET_DEBUG_BUILTIN)
16855 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
16859 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
16862 /* Initialize the predicates. */
16863 d = bdesc_altivec_preds;
16864 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16866 machine_mode mode1;
16868 HOST_WIDE_INT mask = d->mask;
16870 if ((mask & builtin_mask) != mask)
16872 if (TARGET_DEBUG_BUILTIN)
16873 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
16878 if (rs6000_overloaded_builtin_p (d->code))
16882 /* Cannot define builtin if the instruction is disabled. */
16883 gcc_assert (d->icode != CODE_FOR_nothing);
16884 mode1 = insn_data[d->icode].operand[1].mode;
16890 type = int_ftype_int_opaque_opaque;
16893 type = int_ftype_int_v2di_v2di;
16896 type = int_ftype_int_v4si_v4si;
16899 type = int_ftype_int_v8hi_v8hi;
16902 type = int_ftype_int_v16qi_v16qi;
16905 type = int_ftype_int_v4sf_v4sf;
16908 type = int_ftype_int_v2df_v2df;
16911 gcc_unreachable ();
16914 def_builtin (d->name, type, d->code);
16917 /* Initialize the abs* operators. */
16919 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16921 machine_mode mode0;
16923 HOST_WIDE_INT mask = d->mask;
16925 if ((mask & builtin_mask) != mask)
16927 if (TARGET_DEBUG_BUILTIN)
16928 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
16933 /* Cannot define builtin if the instruction is disabled. */
16934 gcc_assert (d->icode != CODE_FOR_nothing);
16935 mode0 = insn_data[d->icode].operand[0].mode;
16940 type = v2di_ftype_v2di;
16943 type = v4si_ftype_v4si;
16946 type = v8hi_ftype_v8hi;
16949 type = v16qi_ftype_v16qi;
16952 type = v4sf_ftype_v4sf;
16955 type = v2df_ftype_v2df;
16958 gcc_unreachable ();
16961 def_builtin (d->name, type, d->code);
16964 /* Initialize target builtin that implements
16965 targetm.vectorize.builtin_mask_for_load. */
16967 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
16968 v16qi_ftype_long_pcvoid,
16969 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
16970 BUILT_IN_MD, NULL, NULL_TREE);
16971 TREE_READONLY (decl) = 1;
16972 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
16973 altivec_builtin_mask_for_load = decl;
16975 /* Access to the vec_init patterns. */
16976 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
16977 integer_type_node, integer_type_node,
16978 integer_type_node, NULL_TREE);
16979 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
16981 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
16982 short_integer_type_node,
16983 short_integer_type_node,
16984 short_integer_type_node,
16985 short_integer_type_node,
16986 short_integer_type_node,
16987 short_integer_type_node,
16988 short_integer_type_node, NULL_TREE);
16989 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
16991 ftype = build_function_type_list (V16QI_type_node, char_type_node,
16992 char_type_node, char_type_node,
16993 char_type_node, char_type_node,
16994 char_type_node, char_type_node,
16995 char_type_node, char_type_node,
16996 char_type_node, char_type_node,
16997 char_type_node, char_type_node,
16998 char_type_node, char_type_node,
16999 char_type_node, NULL_TREE);
17000 def_builtin ("__builtin_vec_init_v16qi", ftype,
17001 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
17003 ftype = build_function_type_list (V4SF_type_node, float_type_node,
17004 float_type_node, float_type_node,
17005 float_type_node, NULL_TREE);
17006 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
17008 /* VSX builtins. */
17009 ftype = build_function_type_list (V2DF_type_node, double_type_node,
17010 double_type_node, NULL_TREE);
17011 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
17013 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
17014 intDI_type_node, NULL_TREE);
17015 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
17017 /* Access to the vec_set patterns. */
17018 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17020 integer_type_node, NULL_TREE);
17021 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
17023 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17025 integer_type_node, NULL_TREE);
17026 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
17028 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17030 integer_type_node, NULL_TREE);
17031 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
17033 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17035 integer_type_node, NULL_TREE);
17036 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
17038 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
17040 integer_type_node, NULL_TREE);
17041 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
17043 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17045 integer_type_node, NULL_TREE);
17046 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17048 /* Access to the vec_extract patterns. */
17049 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17050 integer_type_node, NULL_TREE);
17051 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17053 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17054 integer_type_node, NULL_TREE);
17055 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17057 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17058 integer_type_node, NULL_TREE);
17059 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17061 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17062 integer_type_node, NULL_TREE);
17063 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17065 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17066 integer_type_node, NULL_TREE);
17067 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17069 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17070 integer_type_node, NULL_TREE);
17071 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17074 if (V1TI_type_node)
17076 tree v1ti_ftype_long_pcvoid
17077 = build_function_type_list (V1TI_type_node,
17078 long_integer_type_node, pcvoid_type_node,
17080 tree void_ftype_v1ti_long_pvoid
17081 = build_function_type_list (void_type_node,
17082 V1TI_type_node, long_integer_type_node,
17083 pvoid_type_node, NULL_TREE);
17084 def_builtin ("__builtin_vsx_ld_elemrev_v1ti", v1ti_ftype_long_pcvoid,
17085 VSX_BUILTIN_LD_ELEMREV_V1TI);
17086 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17087 VSX_BUILTIN_LXVD2X_V1TI);
17088 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17089 VSX_BUILTIN_STXVD2X_V1TI);
17090 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17091 NULL_TREE, NULL_TREE);
17092 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17093 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17095 integer_type_node, NULL_TREE);
17096 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17097 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17098 integer_type_node, NULL_TREE);
17099 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17105 htm_init_builtins (void)
17107 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17108 const struct builtin_description *d;
17112 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17114 tree op[MAX_HTM_OPERANDS], type;
17115 HOST_WIDE_INT mask = d->mask;
17116 unsigned attr = rs6000_builtin_info[d->code].attr;
17117 bool void_func = (attr & RS6000_BTC_VOID);
17118 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17120 tree gpr_type_node;
17124 /* It is expected that these htm built-in functions may have
17125 d->icode equal to CODE_FOR_nothing. */
17127 if (TARGET_32BIT && TARGET_POWERPC64)
17128 gpr_type_node = long_long_unsigned_type_node;
17130 gpr_type_node = long_unsigned_type_node;
17132 if (attr & RS6000_BTC_SPR)
17134 rettype = gpr_type_node;
17135 argtype = gpr_type_node;
17137 else if (d->code == HTM_BUILTIN_TABORTDC
17138 || d->code == HTM_BUILTIN_TABORTDCI)
17140 rettype = unsigned_type_node;
17141 argtype = gpr_type_node;
17145 rettype = unsigned_type_node;
17146 argtype = unsigned_type_node;
17149 if ((mask & builtin_mask) != mask)
17151 if (TARGET_DEBUG_BUILTIN)
17152 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17158 if (TARGET_DEBUG_BUILTIN)
17159 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17160 (long unsigned) i);
17164 op[nopnds++] = (void_func) ? void_type_node : rettype;
17166 if (attr_args == RS6000_BTC_UNARY)
17167 op[nopnds++] = argtype;
17168 else if (attr_args == RS6000_BTC_BINARY)
17170 op[nopnds++] = argtype;
17171 op[nopnds++] = argtype;
17173 else if (attr_args == RS6000_BTC_TERNARY)
17175 op[nopnds++] = argtype;
17176 op[nopnds++] = argtype;
17177 op[nopnds++] = argtype;
17183 type = build_function_type_list (op[0], NULL_TREE);
17186 type = build_function_type_list (op[0], op[1], NULL_TREE);
17189 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17192 type = build_function_type_list (op[0], op[1], op[2], op[3],
17196 gcc_unreachable ();
17199 def_builtin (d->name, type, d->code);
17203 /* Hash function for builtin functions with up to 3 arguments and a return
17206 builtin_hasher::hash (builtin_hash_struct *bh)
17211 for (i = 0; i < 4; i++)
17213 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17214 ret = (ret * 2) + bh->uns_p[i];
17220 /* Compare builtin hash entries H1 and H2 for equivalence. */
17222 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17224 return ((p1->mode[0] == p2->mode[0])
17225 && (p1->mode[1] == p2->mode[1])
17226 && (p1->mode[2] == p2->mode[2])
17227 && (p1->mode[3] == p2->mode[3])
17228 && (p1->uns_p[0] == p2->uns_p[0])
17229 && (p1->uns_p[1] == p2->uns_p[1])
17230 && (p1->uns_p[2] == p2->uns_p[2])
17231 && (p1->uns_p[3] == p2->uns_p[3]));
17234 /* Map types for builtin functions with an explicit return type and up to 3
17235 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17236 of the argument. */
17238 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17239 machine_mode mode_arg1, machine_mode mode_arg2,
17240 enum rs6000_builtins builtin, const char *name)
17242 struct builtin_hash_struct h;
17243 struct builtin_hash_struct *h2;
17246 tree ret_type = NULL_TREE;
17247 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17249 /* Create builtin_hash_table. */
17250 if (builtin_hash_table == NULL)
17251 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17253 h.type = NULL_TREE;
17254 h.mode[0] = mode_ret;
17255 h.mode[1] = mode_arg0;
17256 h.mode[2] = mode_arg1;
17257 h.mode[3] = mode_arg2;
17263 /* If the builtin is a type that produces unsigned results or takes unsigned
17264 arguments, and it is returned as a decl for the vectorizer (such as
17265 widening multiplies, permute), make sure the arguments and return value
17266 are type correct. */
17269 /* unsigned 1 argument functions. */
17270 case CRYPTO_BUILTIN_VSBOX:
17271 case CRYPTO_BUILTIN_VSBOX_BE:
17272 case P8V_BUILTIN_VGBBD:
17273 case MISC_BUILTIN_CDTBCD:
17274 case MISC_BUILTIN_CBCDTD:
17279 /* unsigned 2 argument functions. */
17280 case ALTIVEC_BUILTIN_VMULEUB:
17281 case ALTIVEC_BUILTIN_VMULEUH:
17282 case P8V_BUILTIN_VMULEUW:
17283 case ALTIVEC_BUILTIN_VMULOUB:
17284 case ALTIVEC_BUILTIN_VMULOUH:
17285 case P8V_BUILTIN_VMULOUW:
17286 case CRYPTO_BUILTIN_VCIPHER:
17287 case CRYPTO_BUILTIN_VCIPHER_BE:
17288 case CRYPTO_BUILTIN_VCIPHERLAST:
17289 case CRYPTO_BUILTIN_VCIPHERLAST_BE:
17290 case CRYPTO_BUILTIN_VNCIPHER:
17291 case CRYPTO_BUILTIN_VNCIPHER_BE:
17292 case CRYPTO_BUILTIN_VNCIPHERLAST:
17293 case CRYPTO_BUILTIN_VNCIPHERLAST_BE:
17294 case CRYPTO_BUILTIN_VPMSUMB:
17295 case CRYPTO_BUILTIN_VPMSUMH:
17296 case CRYPTO_BUILTIN_VPMSUMW:
17297 case CRYPTO_BUILTIN_VPMSUMD:
17298 case CRYPTO_BUILTIN_VPMSUM:
17299 case MISC_BUILTIN_ADDG6S:
17300 case MISC_BUILTIN_DIVWEU:
17301 case MISC_BUILTIN_DIVDEU:
17302 case VSX_BUILTIN_UDIV_V2DI:
17303 case ALTIVEC_BUILTIN_VMAXUB:
17304 case ALTIVEC_BUILTIN_VMINUB:
17305 case ALTIVEC_BUILTIN_VMAXUH:
17306 case ALTIVEC_BUILTIN_VMINUH:
17307 case ALTIVEC_BUILTIN_VMAXUW:
17308 case ALTIVEC_BUILTIN_VMINUW:
17309 case P8V_BUILTIN_VMAXUD:
17310 case P8V_BUILTIN_VMINUD:
17316 /* unsigned 3 argument functions. */
17317 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
17318 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
17319 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
17320 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
17321 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
17322 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
17323 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
17324 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
17325 case VSX_BUILTIN_VPERM_16QI_UNS:
17326 case VSX_BUILTIN_VPERM_8HI_UNS:
17327 case VSX_BUILTIN_VPERM_4SI_UNS:
17328 case VSX_BUILTIN_VPERM_2DI_UNS:
17329 case VSX_BUILTIN_XXSEL_16QI_UNS:
17330 case VSX_BUILTIN_XXSEL_8HI_UNS:
17331 case VSX_BUILTIN_XXSEL_4SI_UNS:
17332 case VSX_BUILTIN_XXSEL_2DI_UNS:
17333 case CRYPTO_BUILTIN_VPERMXOR:
17334 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
17335 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
17336 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
17337 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
17338 case CRYPTO_BUILTIN_VSHASIGMAW:
17339 case CRYPTO_BUILTIN_VSHASIGMAD:
17340 case CRYPTO_BUILTIN_VSHASIGMA:
17347 /* signed permute functions with unsigned char mask. */
17348 case ALTIVEC_BUILTIN_VPERM_16QI:
17349 case ALTIVEC_BUILTIN_VPERM_8HI:
17350 case ALTIVEC_BUILTIN_VPERM_4SI:
17351 case ALTIVEC_BUILTIN_VPERM_4SF:
17352 case ALTIVEC_BUILTIN_VPERM_2DI:
17353 case ALTIVEC_BUILTIN_VPERM_2DF:
17354 case VSX_BUILTIN_VPERM_16QI:
17355 case VSX_BUILTIN_VPERM_8HI:
17356 case VSX_BUILTIN_VPERM_4SI:
17357 case VSX_BUILTIN_VPERM_4SF:
17358 case VSX_BUILTIN_VPERM_2DI:
17359 case VSX_BUILTIN_VPERM_2DF:
17363 /* unsigned args, signed return. */
17364 case VSX_BUILTIN_XVCVUXDSP:
17365 case VSX_BUILTIN_XVCVUXDDP_UNS:
17366 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
17370 /* signed args, unsigned return. */
17371 case VSX_BUILTIN_XVCVDPUXDS_UNS:
17372 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
17373 case MISC_BUILTIN_UNPACK_TD:
17374 case MISC_BUILTIN_UNPACK_V1TI:
17378 /* unsigned arguments, bool return (compares). */
17379 case ALTIVEC_BUILTIN_VCMPEQUB:
17380 case ALTIVEC_BUILTIN_VCMPEQUH:
17381 case ALTIVEC_BUILTIN_VCMPEQUW:
17382 case P8V_BUILTIN_VCMPEQUD:
17383 case VSX_BUILTIN_CMPGE_U16QI:
17384 case VSX_BUILTIN_CMPGE_U8HI:
17385 case VSX_BUILTIN_CMPGE_U4SI:
17386 case VSX_BUILTIN_CMPGE_U2DI:
17387 case ALTIVEC_BUILTIN_VCMPGTUB:
17388 case ALTIVEC_BUILTIN_VCMPGTUH:
17389 case ALTIVEC_BUILTIN_VCMPGTUW:
17390 case P8V_BUILTIN_VCMPGTUD:
17395 /* unsigned arguments for 128-bit pack instructions. */
17396 case MISC_BUILTIN_PACK_TD:
17397 case MISC_BUILTIN_PACK_V1TI:
17402 /* unsigned second arguments (vector shift right). */
17403 case ALTIVEC_BUILTIN_VSRB:
17404 case ALTIVEC_BUILTIN_VSRH:
17405 case ALTIVEC_BUILTIN_VSRW:
17406 case P8V_BUILTIN_VSRD:
17414 /* Figure out how many args are present. */
17415 while (num_args > 0 && h.mode[num_args] == VOIDmode)
17418 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
17419 if (!ret_type && h.uns_p[0])
17420 ret_type = builtin_mode_to_type[h.mode[0]][0];
17423 fatal_error (input_location,
17424 "internal error: builtin function %qs had an unexpected "
17425 "return type %qs", name, GET_MODE_NAME (h.mode[0]));
17427 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
17428 arg_type[i] = NULL_TREE;
17430 for (i = 0; i < num_args; i++)
17432 int m = (int) h.mode[i+1];
17433 int uns_p = h.uns_p[i+1];
17435 arg_type[i] = builtin_mode_to_type[m][uns_p];
17436 if (!arg_type[i] && uns_p)
17437 arg_type[i] = builtin_mode_to_type[m][0];
17440 fatal_error (input_location,
17441 "internal error: builtin function %qs, argument %d "
17442 "had unexpected argument type %qs", name, i,
17443 GET_MODE_NAME (m));
17446 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
17447 if (*found == NULL)
17449 h2 = ggc_alloc<builtin_hash_struct> ();
17453 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
17454 arg_type[2], NULL_TREE);
17457 return (*found)->type;
17461 rs6000_common_init_builtins (void)
17463 const struct builtin_description *d;
17466 tree opaque_ftype_opaque = NULL_TREE;
17467 tree opaque_ftype_opaque_opaque = NULL_TREE;
17468 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
17469 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17471 /* Create Altivec and VSX builtins on machines with at least the
17472 general purpose extensions (970 and newer) to allow the use of
17473 the target attribute. */
17475 if (TARGET_EXTRA_BUILTINS)
17476 builtin_mask |= RS6000_BTM_COMMON;
17478 /* Add the ternary operators. */
17480 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17483 HOST_WIDE_INT mask = d->mask;
17485 if ((mask & builtin_mask) != mask)
17487 if (TARGET_DEBUG_BUILTIN)
17488 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
17492 if (rs6000_overloaded_builtin_p (d->code))
17494 if (! (type = opaque_ftype_opaque_opaque_opaque))
17495 type = opaque_ftype_opaque_opaque_opaque
17496 = build_function_type_list (opaque_V4SI_type_node,
17497 opaque_V4SI_type_node,
17498 opaque_V4SI_type_node,
17499 opaque_V4SI_type_node,
17504 enum insn_code icode = d->icode;
17507 if (TARGET_DEBUG_BUILTIN)
17508 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
17514 if (icode == CODE_FOR_nothing)
17516 if (TARGET_DEBUG_BUILTIN)
17517 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
17523 type = builtin_function_type (insn_data[icode].operand[0].mode,
17524 insn_data[icode].operand[1].mode,
17525 insn_data[icode].operand[2].mode,
17526 insn_data[icode].operand[3].mode,
17530 def_builtin (d->name, type, d->code);
17533 /* Add the binary operators. */
17535 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17537 machine_mode mode0, mode1, mode2;
17539 HOST_WIDE_INT mask = d->mask;
17541 if ((mask & builtin_mask) != mask)
17543 if (TARGET_DEBUG_BUILTIN)
17544 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
17548 if (rs6000_overloaded_builtin_p (d->code))
17550 if (! (type = opaque_ftype_opaque_opaque))
17551 type = opaque_ftype_opaque_opaque
17552 = build_function_type_list (opaque_V4SI_type_node,
17553 opaque_V4SI_type_node,
17554 opaque_V4SI_type_node,
17559 enum insn_code icode = d->icode;
17562 if (TARGET_DEBUG_BUILTIN)
17563 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
17569 if (icode == CODE_FOR_nothing)
17571 if (TARGET_DEBUG_BUILTIN)
17572 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
17578 mode0 = insn_data[icode].operand[0].mode;
17579 mode1 = insn_data[icode].operand[1].mode;
17580 mode2 = insn_data[icode].operand[2].mode;
17582 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
17586 def_builtin (d->name, type, d->code);
17589 /* Add the simple unary operators. */
17591 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17593 machine_mode mode0, mode1;
17595 HOST_WIDE_INT mask = d->mask;
17597 if ((mask & builtin_mask) != mask)
17599 if (TARGET_DEBUG_BUILTIN)
17600 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
17604 if (rs6000_overloaded_builtin_p (d->code))
17606 if (! (type = opaque_ftype_opaque))
17607 type = opaque_ftype_opaque
17608 = build_function_type_list (opaque_V4SI_type_node,
17609 opaque_V4SI_type_node,
17614 enum insn_code icode = d->icode;
17617 if (TARGET_DEBUG_BUILTIN)
17618 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
17624 if (icode == CODE_FOR_nothing)
17626 if (TARGET_DEBUG_BUILTIN)
17627 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
17633 mode0 = insn_data[icode].operand[0].mode;
17634 mode1 = insn_data[icode].operand[1].mode;
17636 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
17640 def_builtin (d->name, type, d->code);
17643 /* Add the simple no-argument operators. */
17645 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17647 machine_mode mode0;
17649 HOST_WIDE_INT mask = d->mask;
17651 if ((mask & builtin_mask) != mask)
17653 if (TARGET_DEBUG_BUILTIN)
17654 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
17657 if (rs6000_overloaded_builtin_p (d->code))
17659 if (!opaque_ftype_opaque)
17660 opaque_ftype_opaque
17661 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
17662 type = opaque_ftype_opaque;
17666 enum insn_code icode = d->icode;
17669 if (TARGET_DEBUG_BUILTIN)
17670 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
17671 (long unsigned) i);
17674 if (icode == CODE_FOR_nothing)
17676 if (TARGET_DEBUG_BUILTIN)
17678 "rs6000_builtin, skip no-argument %s (no code)\n",
17682 mode0 = insn_data[icode].operand[0].mode;
17683 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
17686 def_builtin (d->name, type, d->code);
17690 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
17692 init_float128_ibm (machine_mode mode)
17694 if (!TARGET_XL_COMPAT)
17696 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
17697 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
17698 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
17699 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
17701 if (!TARGET_HARD_FLOAT)
17703 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
17704 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
17705 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
17706 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
17707 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
17708 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
17709 set_optab_libfunc (le_optab, mode, "__gcc_qle");
17710 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
17712 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
17713 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
17714 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
17715 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
17716 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
17717 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
17718 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
17719 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
17724 set_optab_libfunc (add_optab, mode, "_xlqadd");
17725 set_optab_libfunc (sub_optab, mode, "_xlqsub");
17726 set_optab_libfunc (smul_optab, mode, "_xlqmul");
17727 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
17730 /* Add various conversions for IFmode to use the traditional TFmode
17732 if (mode == IFmode)
17734 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
17735 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
17736 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
17737 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
17738 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
17739 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
17741 if (TARGET_POWERPC64)
17743 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
17744 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
17745 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
17746 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
17751 /* Create a decl for either complex long double multiply or complex long double
17752 divide when long double is IEEE 128-bit floating point. We can't use
17753 __multc3 and __divtc3 because the original long double using IBM extended
17754 double used those names. The complex multiply/divide functions are encoded
17755 as builtin functions with a complex result and 4 scalar inputs. */
17758 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
17760 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
17763 set_builtin_decl (fncode, fndecl, true);
17765 if (TARGET_DEBUG_BUILTIN)
17766 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
17771 /* Set up IEEE 128-bit floating point routines. Use different names if the
17772 arguments can be passed in a vector register. The historical PowerPC
17773 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
17774 continue to use that if we aren't using vector registers to pass IEEE
17775 128-bit floating point. */
17778 init_float128_ieee (machine_mode mode)
17780 if (FLOAT128_VECTOR_P (mode))
17782 static bool complex_muldiv_init_p = false;
17784 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
17785 we have clone or target attributes, this will be called a second
17786 time. We want to create the built-in function only once. */
17787 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
17789 complex_muldiv_init_p = true;
17790 built_in_function fncode_mul =
17791 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
17792 - MIN_MODE_COMPLEX_FLOAT);
17793 built_in_function fncode_div =
17794 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
17795 - MIN_MODE_COMPLEX_FLOAT);
17797 tree fntype = build_function_type_list (complex_long_double_type_node,
17798 long_double_type_node,
17799 long_double_type_node,
17800 long_double_type_node,
17801 long_double_type_node,
17804 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
17805 create_complex_muldiv ("__divkc3", fncode_div, fntype);
17808 set_optab_libfunc (add_optab, mode, "__addkf3");
17809 set_optab_libfunc (sub_optab, mode, "__subkf3");
17810 set_optab_libfunc (neg_optab, mode, "__negkf2");
17811 set_optab_libfunc (smul_optab, mode, "__mulkf3");
17812 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
17813 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
17814 set_optab_libfunc (abs_optab, mode, "__abskf2");
17815 set_optab_libfunc (powi_optab, mode, "__powikf2");
17817 set_optab_libfunc (eq_optab, mode, "__eqkf2");
17818 set_optab_libfunc (ne_optab, mode, "__nekf2");
17819 set_optab_libfunc (gt_optab, mode, "__gtkf2");
17820 set_optab_libfunc (ge_optab, mode, "__gekf2");
17821 set_optab_libfunc (lt_optab, mode, "__ltkf2");
17822 set_optab_libfunc (le_optab, mode, "__lekf2");
17823 set_optab_libfunc (unord_optab, mode, "__unordkf2");
17825 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
17826 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
17827 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
17828 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
17830 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
17831 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17832 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
17834 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
17835 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17836 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
17838 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
17839 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
17840 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
17841 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
17842 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
17843 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
17845 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
17846 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
17847 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
17848 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
17850 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
17851 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
17852 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
17853 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
17855 if (TARGET_POWERPC64)
17857 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
17858 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
17859 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
17860 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
17866 set_optab_libfunc (add_optab, mode, "_q_add");
17867 set_optab_libfunc (sub_optab, mode, "_q_sub");
17868 set_optab_libfunc (neg_optab, mode, "_q_neg");
17869 set_optab_libfunc (smul_optab, mode, "_q_mul");
17870 set_optab_libfunc (sdiv_optab, mode, "_q_div");
17871 if (TARGET_PPC_GPOPT)
17872 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
17874 set_optab_libfunc (eq_optab, mode, "_q_feq");
17875 set_optab_libfunc (ne_optab, mode, "_q_fne");
17876 set_optab_libfunc (gt_optab, mode, "_q_fgt");
17877 set_optab_libfunc (ge_optab, mode, "_q_fge");
17878 set_optab_libfunc (lt_optab, mode, "_q_flt");
17879 set_optab_libfunc (le_optab, mode, "_q_fle");
17881 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
17882 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
17883 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
17884 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
17885 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
17886 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
17887 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
17888 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
17893 rs6000_init_libfuncs (void)
17895 /* __float128 support. */
17896 if (TARGET_FLOAT128_TYPE)
17898 init_float128_ibm (IFmode);
17899 init_float128_ieee (KFmode);
17902 /* AIX/Darwin/64-bit Linux quad floating point routines. */
17903 if (TARGET_LONG_DOUBLE_128)
17905 if (!TARGET_IEEEQUAD)
17906 init_float128_ibm (TFmode);
17908 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
17910 init_float128_ieee (TFmode);
17914 /* Emit a potentially record-form instruction, setting DST from SRC.
17915 If DOT is 0, that is all; otherwise, set CCREG to the result of the
17916 signed comparison of DST with zero. If DOT is 1, the generated RTL
17917 doesn't care about the DST result; if DOT is 2, it does. If CCREG
17918 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
17919 a separate COMPARE. */
17922 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
17926 emit_move_insn (dst, src);
17930 if (cc_reg_not_cr0_operand (ccreg, CCmode))
17932 emit_move_insn (dst, src);
17933 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
17937 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
17940 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
17941 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
17945 rtx set = gen_rtx_SET (dst, src);
17946 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
17951 /* A validation routine: say whether CODE, a condition code, and MODE
17952 match. The other alternatives either don't make sense or should
17953 never be generated. */
17956 validate_condition_mode (enum rtx_code code, machine_mode mode)
17958 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
17959 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
17960 && GET_MODE_CLASS (mode) == MODE_CC);
17962 /* These don't make sense. */
17963 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
17964 || mode != CCUNSmode);
17966 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
17967 || mode == CCUNSmode);
17969 gcc_assert (mode == CCFPmode
17970 || (code != ORDERED && code != UNORDERED
17971 && code != UNEQ && code != LTGT
17972 && code != UNGT && code != UNLT
17973 && code != UNGE && code != UNLE));
17975 /* These should never be generated except for
17976 flag_finite_math_only. */
17977 gcc_assert (mode != CCFPmode
17978 || flag_finite_math_only
17979 || (code != LE && code != GE
17980 && code != UNEQ && code != LTGT
17981 && code != UNGT && code != UNLT));
17983 /* These are invalid; the information is not there. */
17984 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
17988 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
17989 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
17990 not zero, store there the bit offset (counted from the right) where
17991 the single stretch of 1 bits begins; and similarly for B, the bit
17992 offset where it ends. */
17995 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
17997 unsigned HOST_WIDE_INT val = INTVAL (mask);
17998 unsigned HOST_WIDE_INT bit;
18000 int n = GET_MODE_PRECISION (mode);
18002 if (mode != DImode && mode != SImode)
18005 if (INTVAL (mask) >= 0)
18008 ne = exact_log2 (bit);
18009 nb = exact_log2 (val + bit);
18011 else if (val + 1 == 0)
18020 nb = exact_log2 (bit);
18021 ne = exact_log2 (val + bit);
18026 ne = exact_log2 (bit);
18027 if (val + bit == 0)
18035 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
18046 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
18047 or rldicr instruction, to implement an AND with it in mode MODE. */
18050 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
18054 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18057 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
18059 if (mode == DImode)
18060 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
18062 /* For SImode, rlwinm can do everything. */
18063 if (mode == SImode)
18064 return (nb < 32 && ne < 32);
18069 /* Return the instruction template for an AND with mask in mode MODE, with
18070 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18073 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
18077 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
18078 gcc_unreachable ();
18080 if (mode == DImode && ne == 0)
18082 operands[3] = GEN_INT (63 - nb);
18084 return "rldicl. %0,%1,0,%3";
18085 return "rldicl %0,%1,0,%3";
18088 if (mode == DImode && nb == 63)
18090 operands[3] = GEN_INT (63 - ne);
18092 return "rldicr. %0,%1,0,%3";
18093 return "rldicr %0,%1,0,%3";
18096 if (nb < 32 && ne < 32)
18098 operands[3] = GEN_INT (31 - nb);
18099 operands[4] = GEN_INT (31 - ne);
18101 return "rlwinm. %0,%1,0,%3,%4";
18102 return "rlwinm %0,%1,0,%3,%4";
18105 gcc_unreachable ();
18108 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
18109 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
18110 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
18113 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
18117 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18120 int n = GET_MODE_PRECISION (mode);
18123 if (CONST_INT_P (XEXP (shift, 1)))
18125 sh = INTVAL (XEXP (shift, 1));
18126 if (sh < 0 || sh >= n)
18130 rtx_code code = GET_CODE (shift);
18132 /* Convert any shift by 0 to a rotate, to simplify below code. */
18136 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18137 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18139 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18145 /* DImode rotates need rld*. */
18146 if (mode == DImode && code == ROTATE)
18147 return (nb == 63 || ne == 0 || ne == sh);
18149 /* SImode rotates need rlw*. */
18150 if (mode == SImode && code == ROTATE)
18151 return (nb < 32 && ne < 32 && sh < 32);
18153 /* Wrap-around masks are only okay for rotates. */
18157 /* Variable shifts are only okay for rotates. */
18161 /* Don't allow ASHIFT if the mask is wrong for that. */
18162 if (code == ASHIFT && ne < sh)
18165 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
18166 if the mask is wrong for that. */
18167 if (nb < 32 && ne < 32 && sh < 32
18168 && !(code == LSHIFTRT && nb >= 32 - sh))
18171 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
18172 if the mask is wrong for that. */
18173 if (code == LSHIFTRT)
18175 if (nb == 63 || ne == 0 || ne == sh)
18176 return !(code == LSHIFTRT && nb >= sh);
18181 /* Return the instruction template for a shift with mask in mode MODE, with
18182 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18185 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
18189 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18190 gcc_unreachable ();
18192 if (mode == DImode && ne == 0)
18194 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18195 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
18196 operands[3] = GEN_INT (63 - nb);
18198 return "rld%I2cl. %0,%1,%2,%3";
18199 return "rld%I2cl %0,%1,%2,%3";
18202 if (mode == DImode && nb == 63)
18204 operands[3] = GEN_INT (63 - ne);
18206 return "rld%I2cr. %0,%1,%2,%3";
18207 return "rld%I2cr %0,%1,%2,%3";
18211 && GET_CODE (operands[4]) != LSHIFTRT
18212 && CONST_INT_P (operands[2])
18213 && ne == INTVAL (operands[2]))
18215 operands[3] = GEN_INT (63 - nb);
18217 return "rld%I2c. %0,%1,%2,%3";
18218 return "rld%I2c %0,%1,%2,%3";
18221 if (nb < 32 && ne < 32)
18223 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18224 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18225 operands[3] = GEN_INT (31 - nb);
18226 operands[4] = GEN_INT (31 - ne);
18227 /* This insn can also be a 64-bit rotate with mask that really makes
18228 it just a shift right (with mask); the %h below are to adjust for
18229 that situation (shift count is >= 32 in that case). */
18231 return "rlw%I2nm. %0,%1,%h2,%3,%4";
18232 return "rlw%I2nm %0,%1,%h2,%3,%4";
18235 gcc_unreachable ();
18238 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
18239 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
18240 ASHIFT, or LSHIFTRT) in mode MODE. */
18243 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
18247 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18250 int n = GET_MODE_PRECISION (mode);
18252 int sh = INTVAL (XEXP (shift, 1));
18253 if (sh < 0 || sh >= n)
18256 rtx_code code = GET_CODE (shift);
18258 /* Convert any shift by 0 to a rotate, to simplify below code. */
18262 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18263 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18265 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18271 /* DImode rotates need rldimi. */
18272 if (mode == DImode && code == ROTATE)
18275 /* SImode rotates need rlwimi. */
18276 if (mode == SImode && code == ROTATE)
18277 return (nb < 32 && ne < 32 && sh < 32);
18279 /* Wrap-around masks are only okay for rotates. */
18283 /* Don't allow ASHIFT if the mask is wrong for that. */
18284 if (code == ASHIFT && ne < sh)
18287 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
18288 if the mask is wrong for that. */
18289 if (nb < 32 && ne < 32 && sh < 32
18290 && !(code == LSHIFTRT && nb >= 32 - sh))
18293 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
18294 if the mask is wrong for that. */
18295 if (code == LSHIFTRT)
18298 return !(code == LSHIFTRT && nb >= sh);
18303 /* Return the instruction template for an insert with mask in mode MODE, with
18304 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18307 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
18311 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18312 gcc_unreachable ();
18314 /* Prefer rldimi because rlwimi is cracked. */
18315 if (TARGET_POWERPC64
18316 && (!dot || mode == DImode)
18317 && GET_CODE (operands[4]) != LSHIFTRT
18318 && ne == INTVAL (operands[2]))
18320 operands[3] = GEN_INT (63 - nb);
18322 return "rldimi. %0,%1,%2,%3";
18323 return "rldimi %0,%1,%2,%3";
18326 if (nb < 32 && ne < 32)
18328 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18329 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18330 operands[3] = GEN_INT (31 - nb);
18331 operands[4] = GEN_INT (31 - ne);
18333 return "rlwimi. %0,%1,%2,%3,%4";
18334 return "rlwimi %0,%1,%2,%3,%4";
18337 gcc_unreachable ();
18340 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
18341 using two machine instructions. */
18344 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
18346 /* There are two kinds of AND we can handle with two insns:
18347 1) those we can do with two rl* insn;
18350 We do not handle that last case yet. */
18352 /* If there is just one stretch of ones, we can do it. */
18353 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
18356 /* Otherwise, fill in the lowest "hole"; if we can do the result with
18357 one insn, we can do the whole thing with two. */
18358 unsigned HOST_WIDE_INT val = INTVAL (c);
18359 unsigned HOST_WIDE_INT bit1 = val & -val;
18360 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18361 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18362 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18363 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
18366 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
18367 If EXPAND is true, split rotate-and-mask instructions we generate to
18368 their constituent parts as well (this is used during expand); if DOT
18369 is 1, make the last insn a record-form instruction clobbering the
18370 destination GPR and setting the CC reg (from operands[3]); if 2, set
18371 that GPR as well as the CC reg. */
18374 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
18376 gcc_assert (!(expand && dot));
18378 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
18380 /* If it is one stretch of ones, it is DImode; shift left, mask, then
18381 shift right. This generates better code than doing the masks without
18382 shifts, or shifting first right and then left. */
18384 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
18386 gcc_assert (mode == DImode);
18388 int shift = 63 - nb;
18391 rtx tmp1 = gen_reg_rtx (DImode);
18392 rtx tmp2 = gen_reg_rtx (DImode);
18393 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
18394 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
18395 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
18399 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
18400 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
18401 emit_move_insn (operands[0], tmp);
18402 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
18403 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18408 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
18409 that does the rest. */
18410 unsigned HOST_WIDE_INT bit1 = val & -val;
18411 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18412 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18413 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18415 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
18416 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
18418 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
18420 /* Two "no-rotate"-and-mask instructions, for SImode. */
18421 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
18423 gcc_assert (mode == SImode);
18425 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18426 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
18427 emit_move_insn (reg, tmp);
18428 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18429 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18433 gcc_assert (mode == DImode);
18435 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
18436 insns; we have to do the first in SImode, because it wraps. */
18437 if (mask2 <= 0xffffffff
18438 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
18440 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18441 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
18443 rtx reg_low = gen_lowpart (SImode, reg);
18444 emit_move_insn (reg_low, tmp);
18445 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18446 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18450 /* Two rld* insns: rotate, clear the hole in the middle (which now is
18451 at the top end), rotate back and clear the other hole. */
18452 int right = exact_log2 (bit3);
18453 int left = 64 - right;
18455 /* Rotate the mask too. */
18456 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
18460 rtx tmp1 = gen_reg_rtx (DImode);
18461 rtx tmp2 = gen_reg_rtx (DImode);
18462 rtx tmp3 = gen_reg_rtx (DImode);
18463 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
18464 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
18465 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
18466 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
18470 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
18471 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
18472 emit_move_insn (operands[0], tmp);
18473 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
18474 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
18475 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18479 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
18480 for lfq and stfq insns iff the registers are hard registers. */
18483 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
18485 /* We might have been passed a SUBREG. */
18486 if (!REG_P (reg1) || !REG_P (reg2))
18489 /* We might have been passed non floating point registers. */
18490 if (!FP_REGNO_P (REGNO (reg1))
18491 || !FP_REGNO_P (REGNO (reg2)))
18494 return (REGNO (reg1) == REGNO (reg2) - 1);
18497 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
18498 addr1 and addr2 must be in consecutive memory locations
18499 (addr2 == addr1 + 8). */
18502 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
18505 unsigned int reg1, reg2;
18506 int offset1, offset2;
18508 /* The mems cannot be volatile. */
18509 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
18512 addr1 = XEXP (mem1, 0);
18513 addr2 = XEXP (mem2, 0);
18515 /* Extract an offset (if used) from the first addr. */
18516 if (GET_CODE (addr1) == PLUS)
18518 /* If not a REG, return zero. */
18519 if (!REG_P (XEXP (addr1, 0)))
18523 reg1 = REGNO (XEXP (addr1, 0));
18524 /* The offset must be constant! */
18525 if (!CONST_INT_P (XEXP (addr1, 1)))
18527 offset1 = INTVAL (XEXP (addr1, 1));
18530 else if (!REG_P (addr1))
18534 reg1 = REGNO (addr1);
18535 /* This was a simple (mem (reg)) expression. Offset is 0. */
18539 /* And now for the second addr. */
18540 if (GET_CODE (addr2) == PLUS)
18542 /* If not a REG, return zero. */
18543 if (!REG_P (XEXP (addr2, 0)))
18547 reg2 = REGNO (XEXP (addr2, 0));
18548 /* The offset must be constant. */
18549 if (!CONST_INT_P (XEXP (addr2, 1)))
18551 offset2 = INTVAL (XEXP (addr2, 1));
18554 else if (!REG_P (addr2))
18558 reg2 = REGNO (addr2);
18559 /* This was a simple (mem (reg)) expression. Offset is 0. */
18563 /* Both of these must have the same base register. */
18567 /* The offset for the second addr must be 8 more than the first addr. */
18568 if (offset2 != offset1 + 8)
18571 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
18576 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
18577 need to use DDmode, in all other cases we can use the same mode. */
18578 static machine_mode
18579 rs6000_secondary_memory_needed_mode (machine_mode mode)
18581 if (lra_in_progress && mode == SDmode)
18586 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
18587 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
18588 only work on the traditional altivec registers, note if an altivec register
18591 static enum rs6000_reg_type
18592 register_to_reg_type (rtx reg, bool *is_altivec)
18594 HOST_WIDE_INT regno;
18595 enum reg_class rclass;
18597 if (SUBREG_P (reg))
18598 reg = SUBREG_REG (reg);
18601 return NO_REG_TYPE;
18603 regno = REGNO (reg);
18604 if (!HARD_REGISTER_NUM_P (regno))
18606 if (!lra_in_progress && !reload_completed)
18607 return PSEUDO_REG_TYPE;
18609 regno = true_regnum (reg);
18610 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
18611 return PSEUDO_REG_TYPE;
18614 gcc_assert (regno >= 0);
18616 if (is_altivec && ALTIVEC_REGNO_P (regno))
18617 *is_altivec = true;
18619 rclass = rs6000_regno_regclass[regno];
18620 return reg_class_to_reg_type[(int)rclass];
18623 /* Helper function to return the cost of adding a TOC entry address. */
18626 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
18630 if (TARGET_CMODEL != CMODEL_SMALL)
18631 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
18634 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
18639 /* Helper function for rs6000_secondary_reload to determine whether the memory
18640 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
18641 needs reloading. Return negative if the memory is not handled by the memory
18642 helper functions and to try a different reload method, 0 if no additional
18643 instructions are need, and positive to give the extra cost for the
18647 rs6000_secondary_reload_memory (rtx addr,
18648 enum reg_class rclass,
18651 int extra_cost = 0;
18652 rtx reg, and_arg, plus_arg0, plus_arg1;
18653 addr_mask_type addr_mask;
18654 const char *type = NULL;
18655 const char *fail_msg = NULL;
18657 if (GPR_REG_CLASS_P (rclass))
18658 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
18660 else if (rclass == FLOAT_REGS)
18661 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
18663 else if (rclass == ALTIVEC_REGS)
18664 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
18666 /* For the combined VSX_REGS, turn off Altivec AND -16. */
18667 else if (rclass == VSX_REGS)
18668 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
18669 & ~RELOAD_REG_AND_M16);
18671 /* If the register allocator hasn't made up its mind yet on the register
18672 class to use, settle on defaults to use. */
18673 else if (rclass == NO_REGS)
18675 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
18676 & ~RELOAD_REG_AND_M16);
18678 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
18679 addr_mask &= ~(RELOAD_REG_INDEXED
18680 | RELOAD_REG_PRE_INCDEC
18681 | RELOAD_REG_PRE_MODIFY);
18687 /* If the register isn't valid in this register class, just return now. */
18688 if ((addr_mask & RELOAD_REG_VALID) == 0)
18690 if (TARGET_DEBUG_ADDR)
18693 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18694 "not valid in class\n",
18695 GET_MODE_NAME (mode), reg_class_names[rclass]);
18702 switch (GET_CODE (addr))
18704 /* Does the register class supports auto update forms for this mode? We
18705 don't need a scratch register, since the powerpc only supports
18706 PRE_INC, PRE_DEC, and PRE_MODIFY. */
18709 reg = XEXP (addr, 0);
18710 if (!base_reg_operand (addr, GET_MODE (reg)))
18712 fail_msg = "no base register #1";
18716 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18724 reg = XEXP (addr, 0);
18725 plus_arg1 = XEXP (addr, 1);
18726 if (!base_reg_operand (reg, GET_MODE (reg))
18727 || GET_CODE (plus_arg1) != PLUS
18728 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
18730 fail_msg = "bad PRE_MODIFY";
18734 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18741 /* Do we need to simulate AND -16 to clear the bottom address bits used
18742 in VMX load/stores? Only allow the AND for vector sizes. */
18744 and_arg = XEXP (addr, 0);
18745 if (GET_MODE_SIZE (mode) != 16
18746 || !CONST_INT_P (XEXP (addr, 1))
18747 || INTVAL (XEXP (addr, 1)) != -16)
18749 fail_msg = "bad Altivec AND #1";
18753 if (rclass != ALTIVEC_REGS)
18755 if (legitimate_indirect_address_p (and_arg, false))
18758 else if (legitimate_indexed_address_p (and_arg, false))
18763 fail_msg = "bad Altivec AND #2";
18771 /* If this is an indirect address, make sure it is a base register. */
18774 if (!legitimate_indirect_address_p (addr, false))
18781 /* If this is an indexed address, make sure the register class can handle
18782 indexed addresses for this mode. */
18784 plus_arg0 = XEXP (addr, 0);
18785 plus_arg1 = XEXP (addr, 1);
18787 /* (plus (plus (reg) (constant)) (constant)) is generated during
18788 push_reload processing, so handle it now. */
18789 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
18791 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18798 /* (plus (plus (reg) (constant)) (reg)) is also generated during
18799 push_reload processing, so handle it now. */
18800 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
18802 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18805 type = "indexed #2";
18809 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
18811 fail_msg = "no base register #2";
18815 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
18817 if ((addr_mask & RELOAD_REG_INDEXED) == 0
18818 || !legitimate_indexed_address_p (addr, false))
18825 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
18826 && CONST_INT_P (plus_arg1))
18828 if (!quad_address_offset_p (INTVAL (plus_arg1)))
18831 type = "vector d-form offset";
18835 /* Make sure the register class can handle offset addresses. */
18836 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
18838 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18841 type = "offset #2";
18847 fail_msg = "bad PLUS";
18854 /* Quad offsets are restricted and can't handle normal addresses. */
18855 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18858 type = "vector d-form lo_sum";
18861 else if (!legitimate_lo_sum_address_p (mode, addr, false))
18863 fail_msg = "bad LO_SUM";
18867 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18874 /* Static addresses need to create a TOC entry. */
18878 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18881 type = "vector d-form lo_sum #2";
18887 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
18891 /* TOC references look like offsetable memory. */
18893 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
18895 fail_msg = "bad UNSPEC";
18899 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18902 type = "vector d-form lo_sum #3";
18905 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18908 type = "toc reference";
18914 fail_msg = "bad address";
18919 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
18921 if (extra_cost < 0)
18923 "rs6000_secondary_reload_memory error: mode = %s, "
18924 "class = %s, addr_mask = '%s', %s\n",
18925 GET_MODE_NAME (mode),
18926 reg_class_names[rclass],
18927 rs6000_debug_addr_mask (addr_mask, false),
18928 (fail_msg != NULL) ? fail_msg : "<bad address>");
18932 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18933 "addr_mask = '%s', extra cost = %d, %s\n",
18934 GET_MODE_NAME (mode),
18935 reg_class_names[rclass],
18936 rs6000_debug_addr_mask (addr_mask, false),
18938 (type) ? type : "<none>");
18946 /* Helper function for rs6000_secondary_reload to return true if a move to a
18947 different register classe is really a simple move. */
18950 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
18951 enum rs6000_reg_type from_type,
18954 int size = GET_MODE_SIZE (mode);
18956 /* Add support for various direct moves available. In this function, we only
18957 look at cases where we don't need any extra registers, and one or more
18958 simple move insns are issued. Originally small integers are not allowed
18959 in FPR/VSX registers. Single precision binary floating is not a simple
18960 move because we need to convert to the single precision memory layout.
18961 The 4-byte SDmode can be moved. TDmode values are disallowed since they
18962 need special direct move handling, which we do not support yet. */
18963 if (TARGET_DIRECT_MOVE
18964 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18965 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
18967 if (TARGET_POWERPC64)
18969 /* ISA 2.07: MTVSRD or MVFVSRD. */
18973 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
18974 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
18978 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
18979 if (TARGET_P8_VECTOR)
18981 if (mode == SImode)
18984 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
18988 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
18989 if (mode == SDmode)
18993 /* Move to/from SPR. */
18994 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
18995 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
18996 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19002 /* Direct move helper function for rs6000_secondary_reload, handle all of the
19003 special direct moves that involve allocating an extra register, return the
19004 insn code of the helper function if there is such a function or
19005 CODE_FOR_nothing if not. */
19008 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
19009 enum rs6000_reg_type from_type,
19011 secondary_reload_info *sri,
19015 enum insn_code icode = CODE_FOR_nothing;
19017 int size = GET_MODE_SIZE (mode);
19019 if (TARGET_POWERPC64 && size == 16)
19021 /* Handle moving 128-bit values from GPRs to VSX point registers on
19022 ISA 2.07 (power8, power9) when running in 64-bit mode using
19023 XXPERMDI to glue the two 64-bit values back together. */
19024 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19026 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
19027 icode = reg_addr[mode].reload_vsx_gpr;
19030 /* Handle moving 128-bit values from VSX point registers to GPRs on
19031 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
19032 bottom 64-bit value. */
19033 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19035 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
19036 icode = reg_addr[mode].reload_gpr_vsx;
19040 else if (TARGET_POWERPC64 && mode == SFmode)
19042 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19044 cost = 3; /* xscvdpspn, mfvsrd, and. */
19045 icode = reg_addr[mode].reload_gpr_vsx;
19048 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19050 cost = 2; /* mtvsrz, xscvspdpn. */
19051 icode = reg_addr[mode].reload_vsx_gpr;
19055 else if (!TARGET_POWERPC64 && size == 8)
19057 /* Handle moving 64-bit values from GPRs to floating point registers on
19058 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
19059 32-bit values back together. Altivec register classes must be handled
19060 specially since a different instruction is used, and the secondary
19061 reload support requires a single instruction class in the scratch
19062 register constraint. However, right now TFmode is not allowed in
19063 Altivec registers, so the pattern will never match. */
19064 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
19066 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
19067 icode = reg_addr[mode].reload_fpr_gpr;
19071 if (icode != CODE_FOR_nothing)
19076 sri->icode = icode;
19077 sri->extra_cost = cost;
19084 /* Return whether a move between two register classes can be done either
19085 directly (simple move) or via a pattern that uses a single extra temporary
19086 (using ISA 2.07's direct move in this case. */
19089 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
19090 enum rs6000_reg_type from_type,
19092 secondary_reload_info *sri,
19095 /* Fall back to load/store reloads if either type is not a register. */
19096 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
19099 /* If we haven't allocated registers yet, assume the move can be done for the
19100 standard register types. */
19101 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
19102 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
19103 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
19106 /* Moves to the same set of registers is a simple move for non-specialized
19108 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
19111 /* Check whether a simple move can be done directly. */
19112 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
19116 sri->icode = CODE_FOR_nothing;
19117 sri->extra_cost = 0;
19122 /* Now check if we can do it in a few steps. */
19123 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
19127 /* Inform reload about cases where moving X with a mode MODE to a register in
19128 RCLASS requires an extra scratch or immediate register. Return the class
19129 needed for the immediate register.
19131 For VSX and Altivec, we may need a register to convert sp+offset into
19134 For misaligned 64-bit gpr loads and stores we need a register to
19135 convert an offset address to indirect. */
19138 rs6000_secondary_reload (bool in_p,
19140 reg_class_t rclass_i,
19142 secondary_reload_info *sri)
19144 enum reg_class rclass = (enum reg_class) rclass_i;
19145 reg_class_t ret = ALL_REGS;
19146 enum insn_code icode;
19147 bool default_p = false;
19148 bool done_p = false;
19150 /* Allow subreg of memory before/during reload. */
19151 bool memory_p = (MEM_P (x)
19152 || (!reload_completed && SUBREG_P (x)
19153 && MEM_P (SUBREG_REG (x))));
19155 sri->icode = CODE_FOR_nothing;
19156 sri->t_icode = CODE_FOR_nothing;
19157 sri->extra_cost = 0;
19159 ? reg_addr[mode].reload_load
19160 : reg_addr[mode].reload_store);
19162 if (REG_P (x) || register_operand (x, mode))
19164 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
19165 bool altivec_p = (rclass == ALTIVEC_REGS);
19166 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
19169 std::swap (to_type, from_type);
19171 /* Can we do a direct move of some sort? */
19172 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
19175 icode = (enum insn_code)sri->icode;
19182 /* Make sure 0.0 is not reloaded or forced into memory. */
19183 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
19190 /* If this is a scalar floating point value and we want to load it into the
19191 traditional Altivec registers, do it via a move via a traditional floating
19192 point register, unless we have D-form addressing. Also make sure that
19193 non-zero constants use a FPR. */
19194 if (!done_p && reg_addr[mode].scalar_in_vmx_p
19195 && !mode_supports_vmx_dform (mode)
19196 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19197 && (memory_p || CONST_DOUBLE_P (x)))
19204 /* Handle reload of load/stores if we have reload helper functions. */
19205 if (!done_p && icode != CODE_FOR_nothing && memory_p)
19207 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
19210 if (extra_cost >= 0)
19214 if (extra_cost > 0)
19216 sri->extra_cost = extra_cost;
19217 sri->icode = icode;
19222 /* Handle unaligned loads and stores of integer registers. */
19223 if (!done_p && TARGET_POWERPC64
19224 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19226 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
19228 rtx addr = XEXP (x, 0);
19229 rtx off = address_offset (addr);
19231 if (off != NULL_RTX)
19233 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19234 unsigned HOST_WIDE_INT offset = INTVAL (off);
19236 /* We need a secondary reload when our legitimate_address_p
19237 says the address is good (as otherwise the entire address
19238 will be reloaded), and the offset is not a multiple of
19239 four or we have an address wrap. Address wrap will only
19240 occur for LO_SUMs since legitimate_offset_address_p
19241 rejects addresses for 16-byte mems that will wrap. */
19242 if (GET_CODE (addr) == LO_SUM
19243 ? (1 /* legitimate_address_p allows any offset for lo_sum */
19244 && ((offset & 3) != 0
19245 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
19246 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
19247 && (offset & 3) != 0))
19249 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
19251 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
19252 : CODE_FOR_reload_di_load);
19254 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
19255 : CODE_FOR_reload_di_store);
19256 sri->extra_cost = 2;
19267 if (!done_p && !TARGET_POWERPC64
19268 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19270 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
19272 rtx addr = XEXP (x, 0);
19273 rtx off = address_offset (addr);
19275 if (off != NULL_RTX)
19277 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19278 unsigned HOST_WIDE_INT offset = INTVAL (off);
19280 /* We need a secondary reload when our legitimate_address_p
19281 says the address is good (as otherwise the entire address
19282 will be reloaded), and we have a wrap.
19284 legitimate_lo_sum_address_p allows LO_SUM addresses to
19285 have any offset so test for wrap in the low 16 bits.
19287 legitimate_offset_address_p checks for the range
19288 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
19289 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
19290 [0x7ff4,0x7fff] respectively, so test for the
19291 intersection of these ranges, [0x7ffc,0x7fff] and
19292 [0x7ff4,0x7ff7] respectively.
19294 Note that the address we see here may have been
19295 manipulated by legitimize_reload_address. */
19296 if (GET_CODE (addr) == LO_SUM
19297 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
19298 : offset - (0x8000 - extra) < UNITS_PER_WORD)
19301 sri->icode = CODE_FOR_reload_si_load;
19303 sri->icode = CODE_FOR_reload_si_store;
19304 sri->extra_cost = 2;
19319 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
19321 gcc_assert (ret != ALL_REGS);
19323 if (TARGET_DEBUG_ADDR)
19326 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
19328 reg_class_names[ret],
19329 in_p ? "true" : "false",
19330 reg_class_names[rclass],
19331 GET_MODE_NAME (mode));
19333 if (reload_completed)
19334 fputs (", after reload", stderr);
19337 fputs (", done_p not set", stderr);
19340 fputs (", default secondary reload", stderr);
19342 if (sri->icode != CODE_FOR_nothing)
19343 fprintf (stderr, ", reload func = %s, extra cost = %d",
19344 insn_data[sri->icode].name, sri->extra_cost);
19346 else if (sri->extra_cost > 0)
19347 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
19349 fputs ("\n", stderr);
19356 /* Better tracing for rs6000_secondary_reload_inner. */
19359 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
19364 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
19366 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
19367 store_p ? "store" : "load");
19370 set = gen_rtx_SET (mem, reg);
19372 set = gen_rtx_SET (reg, mem);
19374 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
19375 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
19378 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
19379 ATTRIBUTE_NORETURN;
19382 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
19385 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
19386 gcc_unreachable ();
19389 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
19390 reload helper functions. These were identified in
19391 rs6000_secondary_reload_memory, and if reload decided to use the secondary
19392 reload, it calls the insns:
19393 reload_<RELOAD:mode>_<P:mptrsize>_store
19394 reload_<RELOAD:mode>_<P:mptrsize>_load
19396 which in turn calls this function, to do whatever is necessary to create
19397 valid addresses. */
19400 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
19402 int regno = true_regnum (reg);
19403 machine_mode mode = GET_MODE (reg);
19404 addr_mask_type addr_mask;
19407 rtx op_reg, op0, op1;
19412 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
19413 || !base_reg_operand (scratch, GET_MODE (scratch)))
19414 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19416 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
19417 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19419 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
19420 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19422 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
19423 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19426 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19428 /* Make sure the mode is valid in this register class. */
19429 if ((addr_mask & RELOAD_REG_VALID) == 0)
19430 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19432 if (TARGET_DEBUG_ADDR)
19433 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
19435 new_addr = addr = XEXP (mem, 0);
19436 switch (GET_CODE (addr))
19438 /* Does the register class support auto update forms for this mode? If
19439 not, do the update now. We don't need a scratch register, since the
19440 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
19443 op_reg = XEXP (addr, 0);
19444 if (!base_reg_operand (op_reg, Pmode))
19445 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19447 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19449 int delta = GET_MODE_SIZE (mode);
19450 if (GET_CODE (addr) == PRE_DEC)
19452 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
19458 op0 = XEXP (addr, 0);
19459 op1 = XEXP (addr, 1);
19460 if (!base_reg_operand (op0, Pmode)
19461 || GET_CODE (op1) != PLUS
19462 || !rtx_equal_p (op0, XEXP (op1, 0)))
19463 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19465 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19467 emit_insn (gen_rtx_SET (op0, op1));
19472 /* Do we need to simulate AND -16 to clear the bottom address bits used
19473 in VMX load/stores? */
19475 op0 = XEXP (addr, 0);
19476 op1 = XEXP (addr, 1);
19477 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
19479 if (REG_P (op0) || SUBREG_P (op0))
19482 else if (GET_CODE (op1) == PLUS)
19484 emit_insn (gen_rtx_SET (scratch, op1));
19489 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19491 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
19492 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
19493 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
19494 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
19495 new_addr = scratch;
19499 /* If this is an indirect address, make sure it is a base register. */
19502 if (!base_reg_operand (addr, GET_MODE (addr)))
19504 emit_insn (gen_rtx_SET (scratch, addr));
19505 new_addr = scratch;
19509 /* If this is an indexed address, make sure the register class can handle
19510 indexed addresses for this mode. */
19512 op0 = XEXP (addr, 0);
19513 op1 = XEXP (addr, 1);
19514 if (!base_reg_operand (op0, Pmode))
19515 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19517 else if (int_reg_operand (op1, Pmode))
19519 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19521 emit_insn (gen_rtx_SET (scratch, addr));
19522 new_addr = scratch;
19526 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
19528 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
19529 || !quad_address_p (addr, mode, false))
19531 emit_insn (gen_rtx_SET (scratch, addr));
19532 new_addr = scratch;
19536 /* Make sure the register class can handle offset addresses. */
19537 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19539 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19541 emit_insn (gen_rtx_SET (scratch, addr));
19542 new_addr = scratch;
19547 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19552 op0 = XEXP (addr, 0);
19553 op1 = XEXP (addr, 1);
19554 if (!base_reg_operand (op0, Pmode))
19555 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19557 else if (int_reg_operand (op1, Pmode))
19559 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19561 emit_insn (gen_rtx_SET (scratch, addr));
19562 new_addr = scratch;
19566 /* Quad offsets are restricted and can't handle normal addresses. */
19567 else if (mode_supports_dq_form (mode))
19569 emit_insn (gen_rtx_SET (scratch, addr));
19570 new_addr = scratch;
19573 /* Make sure the register class can handle offset addresses. */
19574 else if (legitimate_lo_sum_address_p (mode, addr, false))
19576 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19578 emit_insn (gen_rtx_SET (scratch, addr));
19579 new_addr = scratch;
19584 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19591 rs6000_emit_move (scratch, addr, Pmode);
19592 new_addr = scratch;
19596 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19599 /* Adjust the address if it changed. */
19600 if (addr != new_addr)
19602 mem = replace_equiv_address_nv (mem, new_addr);
19603 if (TARGET_DEBUG_ADDR)
19604 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
19607 /* Now create the move. */
19609 emit_insn (gen_rtx_SET (mem, reg));
19611 emit_insn (gen_rtx_SET (reg, mem));
19616 /* Convert reloads involving 64-bit gprs and misaligned offset
19617 addressing, or multiple 32-bit gprs and offsets that are too large,
19618 to use indirect addressing. */
19621 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
19623 int regno = true_regnum (reg);
19624 enum reg_class rclass;
19626 rtx scratch_or_premodify = scratch;
19628 if (TARGET_DEBUG_ADDR)
19630 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
19631 store_p ? "store" : "load");
19632 fprintf (stderr, "reg:\n");
19634 fprintf (stderr, "mem:\n");
19636 fprintf (stderr, "scratch:\n");
19637 debug_rtx (scratch);
19640 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
19641 gcc_assert (MEM_P (mem));
19642 rclass = REGNO_REG_CLASS (regno);
19643 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
19644 addr = XEXP (mem, 0);
19646 if (GET_CODE (addr) == PRE_MODIFY)
19648 gcc_assert (REG_P (XEXP (addr, 0))
19649 && GET_CODE (XEXP (addr, 1)) == PLUS
19650 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
19651 scratch_or_premodify = XEXP (addr, 0);
19652 addr = XEXP (addr, 1);
19654 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
19656 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
19658 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
19660 /* Now create the move. */
19662 emit_insn (gen_rtx_SET (mem, reg));
19664 emit_insn (gen_rtx_SET (reg, mem));
19669 /* Given an rtx X being reloaded into a reg required to be
19670 in class CLASS, return the class of reg to actually use.
19671 In general this is just CLASS; but on some machines
19672 in some cases it is preferable to use a more restrictive class.
19674 On the RS/6000, we have to return NO_REGS when we want to reload a
19675 floating-point CONST_DOUBLE to force it to be copied to memory.
19677 We also don't want to reload integer values into floating-point
19678 registers if we can at all help it. In fact, this can
19679 cause reload to die, if it tries to generate a reload of CTR
19680 into a FP register and discovers it doesn't have the memory location
19683 ??? Would it be a good idea to have reload do the converse, that is
19684 try to reload floating modes into FP registers if possible?
19687 static enum reg_class
19688 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
19690 machine_mode mode = GET_MODE (x);
19691 bool is_constant = CONSTANT_P (x);
19693 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
19694 reload class for it. */
19695 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19696 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
19699 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
19700 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
19703 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
19704 the reloading of address expressions using PLUS into floating point
19706 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
19710 /* Zero is always allowed in all VSX registers. */
19711 if (x == CONST0_RTX (mode))
19714 /* If this is a vector constant that can be formed with a few Altivec
19715 instructions, we want altivec registers. */
19716 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
19717 return ALTIVEC_REGS;
19719 /* If this is an integer constant that can easily be loaded into
19720 vector registers, allow it. */
19721 if (CONST_INT_P (x))
19723 HOST_WIDE_INT value = INTVAL (x);
19725 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
19726 2.06 can generate it in the Altivec registers with
19730 if (TARGET_P8_VECTOR)
19732 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19733 return ALTIVEC_REGS;
19738 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
19739 a sign extend in the Altivec registers. */
19740 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
19741 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
19742 return ALTIVEC_REGS;
19745 /* Force constant to memory. */
19749 /* D-form addressing can easily reload the value. */
19750 if (mode_supports_vmx_dform (mode)
19751 || mode_supports_dq_form (mode))
19754 /* If this is a scalar floating point value and we don't have D-form
19755 addressing, prefer the traditional floating point registers so that we
19756 can use D-form (register+offset) addressing. */
19757 if (rclass == VSX_REGS
19758 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
19761 /* Prefer the Altivec registers if Altivec is handling the vector
19762 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
19764 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
19765 || mode == V1TImode)
19766 return ALTIVEC_REGS;
19771 if (is_constant || GET_CODE (x) == PLUS)
19773 if (reg_class_subset_p (GENERAL_REGS, rclass))
19774 return GENERAL_REGS;
19775 if (reg_class_subset_p (BASE_REGS, rclass))
19780 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
19781 return GENERAL_REGS;
19786 /* Debug version of rs6000_preferred_reload_class. */
19787 static enum reg_class
19788 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
19790 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
19793 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
19795 reg_class_names[ret], reg_class_names[rclass],
19796 GET_MODE_NAME (GET_MODE (x)));
19802 /* If we are copying between FP or AltiVec registers and anything else, we need
19803 a memory location. The exception is when we are targeting ppc64 and the
19804 move to/from fpr to gpr instructions are available. Also, under VSX, you
19805 can copy vector registers from the FP register set to the Altivec register
19806 set and vice versa. */
19809 rs6000_secondary_memory_needed (machine_mode mode,
19810 reg_class_t from_class,
19811 reg_class_t to_class)
19813 enum rs6000_reg_type from_type, to_type;
19814 bool altivec_p = ((from_class == ALTIVEC_REGS)
19815 || (to_class == ALTIVEC_REGS));
19817 /* If a simple/direct move is available, we don't need secondary memory */
19818 from_type = reg_class_to_reg_type[(int)from_class];
19819 to_type = reg_class_to_reg_type[(int)to_class];
19821 if (rs6000_secondary_reload_move (to_type, from_type, mode,
19822 (secondary_reload_info *)0, altivec_p))
19825 /* If we have a floating point or vector register class, we need to use
19826 memory to transfer the data. */
19827 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
19833 /* Debug version of rs6000_secondary_memory_needed. */
19835 rs6000_debug_secondary_memory_needed (machine_mode mode,
19836 reg_class_t from_class,
19837 reg_class_t to_class)
19839 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
19842 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
19843 "to_class = %s, mode = %s\n",
19844 ret ? "true" : "false",
19845 reg_class_names[from_class],
19846 reg_class_names[to_class],
19847 GET_MODE_NAME (mode));
19852 /* Return the register class of a scratch register needed to copy IN into
19853 or out of a register in RCLASS in MODE. If it can be done directly,
19854 NO_REGS is returned. */
19856 static enum reg_class
19857 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
19862 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
19864 && MACHOPIC_INDIRECT
19868 /* We cannot copy a symbolic operand directly into anything
19869 other than BASE_REGS for TARGET_ELF. So indicate that a
19870 register from BASE_REGS is needed as an intermediate
19873 On Darwin, pic addresses require a load from memory, which
19874 needs a base register. */
19875 if (rclass != BASE_REGS
19876 && (SYMBOL_REF_P (in)
19877 || GET_CODE (in) == HIGH
19878 || GET_CODE (in) == LABEL_REF
19879 || GET_CODE (in) == CONST))
19885 regno = REGNO (in);
19886 if (!HARD_REGISTER_NUM_P (regno))
19888 regno = true_regnum (in);
19889 if (!HARD_REGISTER_NUM_P (regno))
19893 else if (SUBREG_P (in))
19895 regno = true_regnum (in);
19896 if (!HARD_REGISTER_NUM_P (regno))
19902 /* If we have VSX register moves, prefer moving scalar values between
19903 Altivec registers and GPR by going via an FPR (and then via memory)
19904 instead of reloading the secondary memory address for Altivec moves. */
19906 && GET_MODE_SIZE (mode) < 16
19907 && !mode_supports_vmx_dform (mode)
19908 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
19909 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
19910 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19911 && (regno >= 0 && INT_REGNO_P (regno)))))
19914 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
19916 if (rclass == GENERAL_REGS || rclass == BASE_REGS
19917 || (regno >= 0 && INT_REGNO_P (regno)))
19920 /* Constants, memory, and VSX registers can go into VSX registers (both the
19921 traditional floating point and the altivec registers). */
19922 if (rclass == VSX_REGS
19923 && (regno == -1 || VSX_REGNO_P (regno)))
19926 /* Constants, memory, and FP registers can go into FP registers. */
19927 if ((regno == -1 || FP_REGNO_P (regno))
19928 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
19929 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
19931 /* Memory, and AltiVec registers can go into AltiVec registers. */
19932 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
19933 && rclass == ALTIVEC_REGS)
19936 /* We can copy among the CR registers. */
19937 if ((rclass == CR_REGS || rclass == CR0_REGS)
19938 && regno >= 0 && CR_REGNO_P (regno))
19941 /* Otherwise, we need GENERAL_REGS. */
19942 return GENERAL_REGS;
19945 /* Debug version of rs6000_secondary_reload_class. */
19946 static enum reg_class
19947 rs6000_debug_secondary_reload_class (enum reg_class rclass,
19948 machine_mode mode, rtx in)
19950 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
19952 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
19953 "mode = %s, input rtx:\n",
19954 reg_class_names[ret], reg_class_names[rclass],
19955 GET_MODE_NAME (mode));
19961 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
19964 rs6000_can_change_mode_class (machine_mode from,
19966 reg_class_t rclass)
19968 unsigned from_size = GET_MODE_SIZE (from);
19969 unsigned to_size = GET_MODE_SIZE (to);
19971 if (from_size != to_size)
19973 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
19975 if (reg_classes_intersect_p (xclass, rclass))
19977 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
19978 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
19979 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
19980 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
19982 /* Don't allow 64-bit types to overlap with 128-bit types that take a
19983 single register under VSX because the scalar part of the register
19984 is in the upper 64-bits, and not the lower 64-bits. Types like
19985 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
19986 IEEE floating point can't overlap, and neither can small
19989 if (to_float128_vector_p && from_float128_vector_p)
19992 else if (to_float128_vector_p || from_float128_vector_p)
19995 /* TDmode in floating-mode registers must always go into a register
19996 pair with the most significant word in the even-numbered register
19997 to match ISA requirements. In little-endian mode, this does not
19998 match subreg numbering, so we cannot allow subregs. */
19999 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
20002 if (from_size < 8 || to_size < 8)
20005 if (from_size == 8 && (8 * to_nregs) != to_size)
20008 if (to_size == 8 && (8 * from_nregs) != from_size)
20017 /* Since the VSX register set includes traditional floating point registers
20018 and altivec registers, just check for the size being different instead of
20019 trying to check whether the modes are vector modes. Otherwise it won't
20020 allow say DF and DI to change classes. For types like TFmode and TDmode
20021 that take 2 64-bit registers, rather than a single 128-bit register, don't
20022 allow subregs of those types to other 128 bit types. */
20023 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
20025 unsigned num_regs = (from_size + 15) / 16;
20026 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
20027 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
20030 return (from_size == 8 || from_size == 16);
20033 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
20034 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
20040 /* Debug version of rs6000_can_change_mode_class. */
20042 rs6000_debug_can_change_mode_class (machine_mode from,
20044 reg_class_t rclass)
20046 bool ret = rs6000_can_change_mode_class (from, to, rclass);
20049 "rs6000_can_change_mode_class, return %s, from = %s, "
20050 "to = %s, rclass = %s\n",
20051 ret ? "true" : "false",
20052 GET_MODE_NAME (from), GET_MODE_NAME (to),
20053 reg_class_names[rclass]);
20058 /* Return a string to do a move operation of 128 bits of data. */
20061 rs6000_output_move_128bit (rtx operands[])
20063 rtx dest = operands[0];
20064 rtx src = operands[1];
20065 machine_mode mode = GET_MODE (dest);
20068 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
20069 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
20073 dest_regno = REGNO (dest);
20074 dest_gpr_p = INT_REGNO_P (dest_regno);
20075 dest_fp_p = FP_REGNO_P (dest_regno);
20076 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
20077 dest_vsx_p = dest_fp_p | dest_vmx_p;
20082 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
20087 src_regno = REGNO (src);
20088 src_gpr_p = INT_REGNO_P (src_regno);
20089 src_fp_p = FP_REGNO_P (src_regno);
20090 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
20091 src_vsx_p = src_fp_p | src_vmx_p;
20096 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
20099 /* Register moves. */
20100 if (dest_regno >= 0 && src_regno >= 0)
20107 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
20108 return (WORDS_BIG_ENDIAN
20109 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
20110 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
20112 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
20116 else if (TARGET_VSX && dest_vsx_p)
20119 return "xxlor %x0,%x1,%x1";
20121 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
20122 return (WORDS_BIG_ENDIAN
20123 ? "mtvsrdd %x0,%1,%L1"
20124 : "mtvsrdd %x0,%L1,%1");
20126 else if (TARGET_DIRECT_MOVE && src_gpr_p)
20130 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
20131 return "vor %0,%1,%1";
20133 else if (dest_fp_p && src_fp_p)
20138 else if (dest_regno >= 0 && MEM_P (src))
20142 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20148 else if (TARGET_ALTIVEC && dest_vmx_p
20149 && altivec_indexed_or_indirect_operand (src, mode))
20150 return "lvx %0,%y1";
20152 else if (TARGET_VSX && dest_vsx_p)
20154 if (mode_supports_dq_form (mode)
20155 && quad_address_p (XEXP (src, 0), mode, true))
20156 return "lxv %x0,%1";
20158 else if (TARGET_P9_VECTOR)
20159 return "lxvx %x0,%y1";
20161 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20162 return "lxvw4x %x0,%y1";
20165 return "lxvd2x %x0,%y1";
20168 else if (TARGET_ALTIVEC && dest_vmx_p)
20169 return "lvx %0,%y1";
20171 else if (dest_fp_p)
20176 else if (src_regno >= 0 && MEM_P (dest))
20180 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20181 return "stq %1,%0";
20186 else if (TARGET_ALTIVEC && src_vmx_p
20187 && altivec_indexed_or_indirect_operand (dest, mode))
20188 return "stvx %1,%y0";
20190 else if (TARGET_VSX && src_vsx_p)
20192 if (mode_supports_dq_form (mode)
20193 && quad_address_p (XEXP (dest, 0), mode, true))
20194 return "stxv %x1,%0";
20196 else if (TARGET_P9_VECTOR)
20197 return "stxvx %x1,%y0";
20199 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20200 return "stxvw4x %x1,%y0";
20203 return "stxvd2x %x1,%y0";
20206 else if (TARGET_ALTIVEC && src_vmx_p)
20207 return "stvx %1,%y0";
20214 else if (dest_regno >= 0
20215 && (CONST_INT_P (src)
20216 || CONST_WIDE_INT_P (src)
20217 || CONST_DOUBLE_P (src)
20218 || GET_CODE (src) == CONST_VECTOR))
20223 else if ((dest_vmx_p && TARGET_ALTIVEC)
20224 || (dest_vsx_p && TARGET_VSX))
20225 return output_vec_const_move (operands);
20228 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
20231 /* Validate a 128-bit move. */
20233 rs6000_move_128bit_ok_p (rtx operands[])
20235 machine_mode mode = GET_MODE (operands[0]);
20236 return (gpc_reg_operand (operands[0], mode)
20237 || gpc_reg_operand (operands[1], mode));
20240 /* Return true if a 128-bit move needs to be split. */
20242 rs6000_split_128bit_ok_p (rtx operands[])
20244 if (!reload_completed)
20247 if (!gpr_or_gpr_p (operands[0], operands[1]))
20250 if (quad_load_store_p (operands[0], operands[1]))
20257 /* Given a comparison operation, return the bit number in CCR to test. We
20258 know this is a valid comparison.
20260 SCC_P is 1 if this is for an scc. That means that %D will have been
20261 used instead of %C, so the bits will be in different places.
20263 Return -1 if OP isn't a valid comparison for some reason. */
20266 ccr_bit (rtx op, int scc_p)
20268 enum rtx_code code = GET_CODE (op);
20269 machine_mode cc_mode;
20274 if (!COMPARISON_P (op))
20277 reg = XEXP (op, 0);
20279 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
20282 cc_mode = GET_MODE (reg);
20283 cc_regnum = REGNO (reg);
20284 base_bit = 4 * (cc_regnum - CR0_REGNO);
20286 validate_condition_mode (code, cc_mode);
20288 /* When generating a sCOND operation, only positive conditions are
20307 return scc_p ? base_bit + 3 : base_bit + 2;
20309 return base_bit + 2;
20310 case GT: case GTU: case UNLE:
20311 return base_bit + 1;
20312 case LT: case LTU: case UNGE:
20314 case ORDERED: case UNORDERED:
20315 return base_bit + 3;
20318 /* If scc, we will have done a cror to put the bit in the
20319 unordered position. So test that bit. For integer, this is ! LT
20320 unless this is an scc insn. */
20321 return scc_p ? base_bit + 3 : base_bit;
20324 return scc_p ? base_bit + 3 : base_bit + 1;
20331 /* Return the GOT register. */
20334 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
20336 /* The second flow pass currently (June 1999) can't update
20337 regs_ever_live without disturbing other parts of the compiler, so
20338 update it here to make the prolog/epilogue code happy. */
20339 if (!can_create_pseudo_p ()
20340 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20341 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
20343 crtl->uses_pic_offset_table = 1;
20345 return pic_offset_table_rtx;
20348 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
20350 /* Write out a function code label. */
20353 rs6000_output_function_entry (FILE *file, const char *fname)
20355 if (fname[0] != '.')
20357 switch (DEFAULT_ABI)
20360 gcc_unreachable ();
20366 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
20376 RS6000_OUTPUT_BASENAME (file, fname);
20379 /* Print an operand. Recognize special options, documented below. */
20382 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
20383 only introduced by the linker, when applying the sda21
20385 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
20386 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
20388 #define SMALL_DATA_RELOC "sda21"
20389 #define SMALL_DATA_REG 0
20393 print_operand (FILE *file, rtx x, int code)
20396 unsigned HOST_WIDE_INT uval;
20400 /* %a is output_address. */
20402 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
20406 /* Like 'J' but get to the GT bit only. */
20407 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20409 output_operand_lossage ("invalid %%D value");
20413 /* Bit 1 is GT bit. */
20414 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
20416 /* Add one for shift count in rlinm for scc. */
20417 fprintf (file, "%d", i + 1);
20421 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
20424 output_operand_lossage ("invalid %%e value");
20429 if ((uval & 0xffff) == 0 && uval != 0)
20434 /* X is a CR register. Print the number of the EQ bit of the CR */
20435 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20436 output_operand_lossage ("invalid %%E value");
20438 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
20442 /* X is a CR register. Print the shift count needed to move it
20443 to the high-order four bits. */
20444 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20445 output_operand_lossage ("invalid %%f value");
20447 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
20451 /* Similar, but print the count for the rotate in the opposite
20453 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20454 output_operand_lossage ("invalid %%F value");
20456 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
20460 /* X is a constant integer. If it is negative, print "m",
20461 otherwise print "z". This is to make an aze or ame insn. */
20462 if (!CONST_INT_P (x))
20463 output_operand_lossage ("invalid %%G value");
20464 else if (INTVAL (x) >= 0)
20471 /* If constant, output low-order five bits. Otherwise, write
20474 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
20476 print_operand (file, x, 0);
20480 /* If constant, output low-order six bits. Otherwise, write
20483 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
20485 print_operand (file, x, 0);
20489 /* Print `i' if this is a constant, else nothing. */
20495 /* Write the bit number in CCR for jump. */
20496 i = ccr_bit (x, 0);
20498 output_operand_lossage ("invalid %%j code");
20500 fprintf (file, "%d", i);
20504 /* Similar, but add one for shift count in rlinm for scc and pass
20505 scc flag to `ccr_bit'. */
20506 i = ccr_bit (x, 1);
20508 output_operand_lossage ("invalid %%J code");
20510 /* If we want bit 31, write a shift count of zero, not 32. */
20511 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20515 /* X must be a constant. Write the 1's complement of the
20518 output_operand_lossage ("invalid %%k value");
20520 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
20524 /* X must be a symbolic constant on ELF. Write an
20525 expression suitable for an 'addi' that adds in the low 16
20526 bits of the MEM. */
20527 if (GET_CODE (x) == CONST)
20529 if (GET_CODE (XEXP (x, 0)) != PLUS
20530 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
20531 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
20532 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20533 output_operand_lossage ("invalid %%K value");
20535 print_operand_address (file, x);
20536 fputs ("@l", file);
20539 /* %l is output_asm_label. */
20542 /* Write second word of DImode or DFmode reference. Works on register
20543 or non-indexed memory only. */
20545 fputs (reg_names[REGNO (x) + 1], file);
20546 else if (MEM_P (x))
20548 machine_mode mode = GET_MODE (x);
20549 /* Handle possible auto-increment. Since it is pre-increment and
20550 we have already done it, we can just use an offset of word. */
20551 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20552 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20553 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20555 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20556 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20559 output_address (mode, XEXP (adjust_address_nv (x, SImode,
20563 if (small_data_operand (x, GET_MODE (x)))
20564 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20565 reg_names[SMALL_DATA_REG]);
20569 case 'N': /* Unused */
20570 /* Write the number of elements in the vector times 4. */
20571 if (GET_CODE (x) != PARALLEL)
20572 output_operand_lossage ("invalid %%N value");
20574 fprintf (file, "%d", XVECLEN (x, 0) * 4);
20577 case 'O': /* Unused */
20578 /* Similar, but subtract 1 first. */
20579 if (GET_CODE (x) != PARALLEL)
20580 output_operand_lossage ("invalid %%O value");
20582 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
20586 /* X is a CONST_INT that is a power of two. Output the logarithm. */
20589 || (i = exact_log2 (INTVAL (x))) < 0)
20590 output_operand_lossage ("invalid %%p value");
20592 fprintf (file, "%d", i);
20596 /* The operand must be an indirect memory reference. The result
20597 is the register name. */
20598 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
20599 || REGNO (XEXP (x, 0)) >= 32)
20600 output_operand_lossage ("invalid %%P value");
20602 fputs (reg_names[REGNO (XEXP (x, 0))], file);
20606 /* This outputs the logical code corresponding to a boolean
20607 expression. The expression may have one or both operands
20608 negated (if one, only the first one). For condition register
20609 logical operations, it will also treat the negated
20610 CR codes as NOTs, but not handle NOTs of them. */
20612 const char *const *t = 0;
20614 enum rtx_code code = GET_CODE (x);
20615 static const char * const tbl[3][3] = {
20616 { "and", "andc", "nor" },
20617 { "or", "orc", "nand" },
20618 { "xor", "eqv", "xor" } };
20622 else if (code == IOR)
20624 else if (code == XOR)
20627 output_operand_lossage ("invalid %%q value");
20629 if (GET_CODE (XEXP (x, 0)) != NOT)
20633 if (GET_CODE (XEXP (x, 1)) == NOT)
20644 if (! TARGET_MFCRF)
20650 /* X is a CR register. Print the mask for `mtcrf'. */
20651 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20652 output_operand_lossage ("invalid %%R value");
20654 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
20658 /* Low 5 bits of 32 - value */
20660 output_operand_lossage ("invalid %%s value");
20662 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
20666 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
20667 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20669 output_operand_lossage ("invalid %%t value");
20673 /* Bit 3 is OV bit. */
20674 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
20676 /* If we want bit 31, write a shift count of zero, not 32. */
20677 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20681 /* Print the symbolic name of a branch target register. */
20682 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
20683 x = XVECEXP (x, 0, 0);
20684 if (!REG_P (x) || (REGNO (x) != LR_REGNO
20685 && REGNO (x) != CTR_REGNO))
20686 output_operand_lossage ("invalid %%T value");
20687 else if (REGNO (x) == LR_REGNO)
20688 fputs ("lr", file);
20690 fputs ("ctr", file);
20694 /* High-order or low-order 16 bits of constant, whichever is non-zero,
20695 for use in unsigned operand. */
20698 output_operand_lossage ("invalid %%u value");
20703 if ((uval & 0xffff) == 0)
20706 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
20710 /* High-order 16 bits of constant for use in signed operand. */
20712 output_operand_lossage ("invalid %%v value");
20714 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
20715 (INTVAL (x) >> 16) & 0xffff);
20719 /* Print `u' if this has an auto-increment or auto-decrement. */
20721 && (GET_CODE (XEXP (x, 0)) == PRE_INC
20722 || GET_CODE (XEXP (x, 0)) == PRE_DEC
20723 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
20728 /* Print the trap code for this operand. */
20729 switch (GET_CODE (x))
20732 fputs ("eq", file); /* 4 */
20735 fputs ("ne", file); /* 24 */
20738 fputs ("lt", file); /* 16 */
20741 fputs ("le", file); /* 20 */
20744 fputs ("gt", file); /* 8 */
20747 fputs ("ge", file); /* 12 */
20750 fputs ("llt", file); /* 2 */
20753 fputs ("lle", file); /* 6 */
20756 fputs ("lgt", file); /* 1 */
20759 fputs ("lge", file); /* 5 */
20762 output_operand_lossage ("invalid %%V value");
20767 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
20770 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
20771 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
20773 print_operand (file, x, 0);
20777 /* X is a FPR or Altivec register used in a VSX context. */
20778 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
20779 output_operand_lossage ("invalid %%x value");
20782 int reg = REGNO (x);
20783 int vsx_reg = (FP_REGNO_P (reg)
20785 : reg - FIRST_ALTIVEC_REGNO + 32);
20787 #ifdef TARGET_REGNAMES
20788 if (TARGET_REGNAMES)
20789 fprintf (file, "%%vs%d", vsx_reg);
20792 fprintf (file, "%d", vsx_reg);
20798 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
20799 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
20800 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
20805 /* Like 'L', for third word of TImode/PTImode */
20807 fputs (reg_names[REGNO (x) + 2], file);
20808 else if (MEM_P (x))
20810 machine_mode mode = GET_MODE (x);
20811 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20812 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20813 output_address (mode, plus_constant (Pmode,
20814 XEXP (XEXP (x, 0), 0), 8));
20815 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20816 output_address (mode, plus_constant (Pmode,
20817 XEXP (XEXP (x, 0), 0), 8));
20819 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
20820 if (small_data_operand (x, GET_MODE (x)))
20821 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20822 reg_names[SMALL_DATA_REG]);
20827 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
20828 x = XVECEXP (x, 0, 1);
20829 /* X is a SYMBOL_REF. Write out the name preceded by a
20830 period and without any trailing data in brackets. Used for function
20831 names. If we are configured for System V (or the embedded ABI) on
20832 the PowerPC, do not emit the period, since those systems do not use
20833 TOCs and the like. */
20834 if (!SYMBOL_REF_P (x))
20836 output_operand_lossage ("invalid %%z value");
20840 /* For macho, check to see if we need a stub. */
20843 const char *name = XSTR (x, 0);
20845 if (darwin_picsymbol_stubs
20846 && MACHOPIC_INDIRECT
20847 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
20848 name = machopic_indirection_name (x, /*stub_p=*/true);
20850 assemble_name (file, name);
20852 else if (!DOT_SYMBOLS)
20853 assemble_name (file, XSTR (x, 0));
20855 rs6000_output_function_entry (file, XSTR (x, 0));
20859 /* Like 'L', for last word of TImode/PTImode. */
20861 fputs (reg_names[REGNO (x) + 3], file);
20862 else if (MEM_P (x))
20864 machine_mode mode = GET_MODE (x);
20865 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20866 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20867 output_address (mode, plus_constant (Pmode,
20868 XEXP (XEXP (x, 0), 0), 12));
20869 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20870 output_address (mode, plus_constant (Pmode,
20871 XEXP (XEXP (x, 0), 0), 12));
20873 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
20874 if (small_data_operand (x, GET_MODE (x)))
20875 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20876 reg_names[SMALL_DATA_REG]);
20880 /* Print AltiVec memory operand. */
20885 gcc_assert (MEM_P (x));
20889 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
20890 && GET_CODE (tmp) == AND
20891 && CONST_INT_P (XEXP (tmp, 1))
20892 && INTVAL (XEXP (tmp, 1)) == -16)
20893 tmp = XEXP (tmp, 0);
20894 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
20895 && GET_CODE (tmp) == PRE_MODIFY)
20896 tmp = XEXP (tmp, 1);
20898 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
20901 if (GET_CODE (tmp) != PLUS
20902 || !REG_P (XEXP (tmp, 0))
20903 || !REG_P (XEXP (tmp, 1)))
20905 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
20909 if (REGNO (XEXP (tmp, 0)) == 0)
20910 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
20911 reg_names[ REGNO (XEXP (tmp, 0)) ]);
20913 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
20914 reg_names[ REGNO (XEXP (tmp, 1)) ]);
20921 fprintf (file, "%s", reg_names[REGNO (x)]);
20922 else if (MEM_P (x))
20924 /* We need to handle PRE_INC and PRE_DEC here, since we need to
20925 know the width from the mode. */
20926 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
20927 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
20928 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20929 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
20930 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
20931 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20932 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20933 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
20935 output_address (GET_MODE (x), XEXP (x, 0));
20937 else if (toc_relative_expr_p (x, false,
20938 &tocrel_base_oac, &tocrel_offset_oac))
20939 /* This hack along with a corresponding hack in
20940 rs6000_output_addr_const_extra arranges to output addends
20941 where the assembler expects to find them. eg.
20942 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
20943 without this hack would be output as "x@toc+4". We
20945 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
20946 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
20947 output_addr_const (file, XVECEXP (x, 0, 0));
20948 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
20949 output_addr_const (file, XVECEXP (x, 0, 1));
20951 output_addr_const (file, x);
20955 if (const char *name = get_some_local_dynamic_name ())
20956 assemble_name (file, name);
20958 output_operand_lossage ("'%%&' used without any "
20959 "local dynamic TLS references");
20963 output_operand_lossage ("invalid %%xn code");
20967 /* Print the address of an operand. */
20970 print_operand_address (FILE *file, rtx x)
20973 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
20975 /* Is it a pc-relative address? */
20976 else if (pcrel_address (x, Pmode))
20978 HOST_WIDE_INT offset;
20980 if (GET_CODE (x) == CONST)
20983 if (GET_CODE (x) == PLUS)
20985 offset = INTVAL (XEXP (x, 1));
20991 output_addr_const (file, x);
20994 fprintf (file, "%+" PRId64, offset);
20996 fputs ("@pcrel", file);
20998 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
20999 || GET_CODE (x) == LABEL_REF)
21001 output_addr_const (file, x);
21002 if (small_data_operand (x, GET_MODE (x)))
21003 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21004 reg_names[SMALL_DATA_REG]);
21006 gcc_assert (!TARGET_TOC);
21008 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21009 && REG_P (XEXP (x, 1)))
21011 if (REGNO (XEXP (x, 0)) == 0)
21012 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
21013 reg_names[ REGNO (XEXP (x, 0)) ]);
21015 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
21016 reg_names[ REGNO (XEXP (x, 1)) ]);
21018 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21019 && CONST_INT_P (XEXP (x, 1)))
21020 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
21021 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
21023 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21024 && CONSTANT_P (XEXP (x, 1)))
21026 fprintf (file, "lo16(");
21027 output_addr_const (file, XEXP (x, 1));
21028 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21032 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21033 && CONSTANT_P (XEXP (x, 1)))
21035 output_addr_const (file, XEXP (x, 1));
21036 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21039 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
21041 /* This hack along with a corresponding hack in
21042 rs6000_output_addr_const_extra arranges to output addends
21043 where the assembler expects to find them. eg.
21045 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
21046 without this hack would be output as "x@toc+8@l(9)". We
21047 want "x+8@toc@l(9)". */
21048 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21049 if (GET_CODE (x) == LO_SUM)
21050 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
21052 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
21055 output_addr_const (file, x);
21058 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
21061 rs6000_output_addr_const_extra (FILE *file, rtx x)
21063 if (GET_CODE (x) == UNSPEC)
21064 switch (XINT (x, 1))
21066 case UNSPEC_TOCREL:
21067 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
21068 && REG_P (XVECEXP (x, 0, 1))
21069 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
21070 output_addr_const (file, XVECEXP (x, 0, 0));
21071 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
21073 if (INTVAL (tocrel_offset_oac) >= 0)
21074 fprintf (file, "+");
21075 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
21077 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
21080 assemble_name (file, toc_label_name);
21083 else if (TARGET_ELF)
21084 fputs ("@toc", file);
21088 case UNSPEC_MACHOPIC_OFFSET:
21089 output_addr_const (file, XVECEXP (x, 0, 0));
21091 machopic_output_function_base_name (file);
21098 /* Target hook for assembling integer objects. The PowerPC version has
21099 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
21100 is defined. It also needs to handle DI-mode objects on 64-bit
21104 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
21106 #ifdef RELOCATABLE_NEEDS_FIXUP
21107 /* Special handling for SI values. */
21108 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
21110 static int recurse = 0;
21112 /* For -mrelocatable, we mark all addresses that need to be fixed up in
21113 the .fixup section. Since the TOC section is already relocated, we
21114 don't need to mark it here. We used to skip the text section, but it
21115 should never be valid for relocated addresses to be placed in the text
21117 if (DEFAULT_ABI == ABI_V4
21118 && (TARGET_RELOCATABLE || flag_pic > 1)
21119 && in_section != toc_section
21121 && !CONST_SCALAR_INT_P (x)
21127 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
21129 ASM_OUTPUT_LABEL (asm_out_file, buf);
21130 fprintf (asm_out_file, "\t.long\t(");
21131 output_addr_const (asm_out_file, x);
21132 fprintf (asm_out_file, ")@fixup\n");
21133 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
21134 ASM_OUTPUT_ALIGN (asm_out_file, 2);
21135 fprintf (asm_out_file, "\t.long\t");
21136 assemble_name (asm_out_file, buf);
21137 fprintf (asm_out_file, "\n\t.previous\n");
21141 /* Remove initial .'s to turn a -mcall-aixdesc function
21142 address into the address of the descriptor, not the function
21144 else if (SYMBOL_REF_P (x)
21145 && XSTR (x, 0)[0] == '.'
21146 && DEFAULT_ABI == ABI_AIX)
21148 const char *name = XSTR (x, 0);
21149 while (*name == '.')
21152 fprintf (asm_out_file, "\t.long\t%s\n", name);
21156 #endif /* RELOCATABLE_NEEDS_FIXUP */
21157 return default_assemble_integer (x, size, aligned_p);
21160 /* Return a template string for assembly to emit when making an
21161 external call. FUNOP is the call mem argument operand number. */
21163 static const char *
21164 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
21166 /* -Wformat-overflow workaround, without which gcc thinks that %u
21167 might produce 10 digits. */
21168 gcc_assert (funop <= MAX_RECOG_OPERANDS);
21172 if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
21174 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
21175 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
21176 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
21177 sprintf (arg, "(%%&@tlsld)");
21179 gcc_unreachable ();
21182 /* The magic 32768 offset here corresponds to the offset of
21183 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
21185 sprintf (z, "%%z%u%s", funop,
21186 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
21189 static char str[32]; /* 1 spare */
21190 if (rs6000_pcrel_p (cfun))
21191 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
21192 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21193 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
21194 sibcall ? "" : "\n\tnop");
21195 else if (DEFAULT_ABI == ABI_V4)
21196 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
21197 flag_pic ? "@plt" : "");
21199 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
21200 else if (DEFAULT_ABI == ABI_DARWIN)
21202 /* The cookie is in operand func+2. */
21203 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
21204 int cookie = INTVAL (operands[funop + 2]);
21205 if (cookie & CALL_LONG)
21207 tree funname = get_identifier (XSTR (operands[funop], 0));
21208 tree labelname = get_prev_label (funname);
21209 gcc_checking_assert (labelname && !sibcall);
21211 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
21212 instruction will reach 'foo', otherwise link as 'bl L42'".
21213 "L42" should be a 'branch island', that will do a far jump to
21214 'foo'. Branch islands are generated in
21215 macho_branch_islands(). */
21216 sprintf (str, "jbsr %%z%u,%.10s", funop,
21217 IDENTIFIER_POINTER (labelname));
21220 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
21222 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
21226 gcc_unreachable ();
21231 rs6000_call_template (rtx *operands, unsigned int funop)
21233 return rs6000_call_template_1 (operands, funop, false);
21237 rs6000_sibcall_template (rtx *operands, unsigned int funop)
21239 return rs6000_call_template_1 (operands, funop, true);
21242 /* As above, for indirect calls. */
21244 static const char *
21245 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
21248 /* -Wformat-overflow workaround, without which gcc thinks that %u
21249 might produce 10 digits. Note that -Wformat-overflow will not
21250 currently warn here for str[], so do not rely on a warning to
21251 ensure str[] is correctly sized. */
21252 gcc_assert (funop <= MAX_RECOG_OPERANDS);
21254 /* Currently, funop is either 0 or 1. The maximum string is always
21255 a !speculate 64-bit __tls_get_addr call.
21258 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21259 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
21261 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21262 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
21269 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21270 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
21272 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21273 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
21280 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21281 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
21283 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21284 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
21291 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21292 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
21294 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21295 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
21299 static char str[160]; /* 8 spare */
21301 const char *ptrload = TARGET_64BIT ? "d" : "wz";
21303 if (DEFAULT_ABI == ABI_AIX)
21306 ptrload, funop + 2);
21308 /* We don't need the extra code to stop indirect call speculation if
21310 bool speculate = (TARGET_MACHO
21311 || rs6000_speculate_indirect_jumps
21312 || (REG_P (operands[funop])
21313 && REGNO (operands[funop]) == LR_REGNO));
21315 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
21317 const char *rel64 = TARGET_64BIT ? "64" : "";
21320 if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
21322 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
21323 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
21325 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
21326 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
21329 gcc_unreachable ();
21332 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
21333 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
21334 && flag_pic == 2 ? "+32768" : "");
21338 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
21339 tls, rel64, notoc, funop, addend);
21340 s += sprintf (s, "crset 2\n\t");
21343 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
21344 tls, rel64, notoc, funop, addend);
21346 else if (!speculate)
21347 s += sprintf (s, "crset 2\n\t");
21349 if (rs6000_pcrel_p (cfun))
21352 sprintf (s, "b%%T%ul", funop);
21354 sprintf (s, "beq%%T%ul-", funop);
21356 else if (DEFAULT_ABI == ABI_AIX)
21362 funop, ptrload, funop + 3);
21367 funop, ptrload, funop + 3);
21369 else if (DEFAULT_ABI == ABI_ELFv2)
21375 funop, ptrload, funop + 2);
21380 funop, ptrload, funop + 2);
21387 funop, sibcall ? "" : "l");
21391 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
21397 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
21399 return rs6000_indirect_call_template_1 (operands, funop, false);
21403 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
21405 return rs6000_indirect_call_template_1 (operands, funop, true);
21409 /* Output indirect call insns. WHICH identifies the type of sequence. */
21411 rs6000_pltseq_template (rtx *operands, int which)
21413 const char *rel64 = TARGET_64BIT ? "64" : "";
21416 if (TARGET_TLS_MARKERS && GET_CODE (operands[3]) == UNSPEC)
21418 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
21419 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
21420 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
21422 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
21423 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
21426 gcc_unreachable ();
21429 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
21430 static char str[96]; /* 10 spare */
21431 char off = WORDS_BIG_ENDIAN ? '2' : '4';
21432 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
21433 && flag_pic == 2 ? "+32768" : "");
21436 case RS6000_PLTSEQ_TOCSAVE:
21439 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
21440 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
21443 case RS6000_PLTSEQ_PLT16_HA:
21444 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
21447 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
21451 "addis %%0,%%1,0\n\t"
21452 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
21453 tls, off, rel64, addend);
21455 case RS6000_PLTSEQ_PLT16_LO:
21457 "l%s %%0,0(%%1)\n\t"
21458 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
21459 TARGET_64BIT ? "d" : "wz",
21460 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
21462 case RS6000_PLTSEQ_MTCTR:
21465 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
21466 tls, rel64, addend);
21468 case RS6000_PLTSEQ_PLT_PCREL34:
21470 "pl%s %%0,0(0),1\n\t"
21471 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
21472 TARGET_64BIT ? "d" : "wz",
21476 gcc_unreachable ();
21482 /* Helper function to return whether a MODE can do prefixed loads/stores.
21483 VOIDmode is used when we are loading the pc-relative address into a base
21484 register, but we are not using it as part of a memory operation. As modes
21485 add support for prefixed memory, they will be added here. */
21488 mode_supports_prefixed_address_p (machine_mode mode)
21490 return mode == VOIDmode;
21493 /* Function to return true if ADDR is a valid prefixed memory address that uses
21497 rs6000_prefixed_address (rtx addr, machine_mode mode)
21499 if (!TARGET_PREFIXED_ADDR || !mode_supports_prefixed_address_p (mode))
21502 /* Check for PC-relative addresses. */
21503 if (pcrel_address (addr, Pmode))
21506 /* Check for prefixed memory addresses that have a large numeric offset,
21507 or an offset that can't be used for a DS/DQ-form memory operation. */
21508 if (GET_CODE (addr) == PLUS)
21510 rtx op0 = XEXP (addr, 0);
21511 rtx op1 = XEXP (addr, 1);
21513 if (!base_reg_operand (op0, Pmode) || !CONST_INT_P (op1))
21516 HOST_WIDE_INT value = INTVAL (op1);
21517 if (!SIGNED_34BIT_OFFSET_P (value))
21520 /* Offset larger than 16-bits? */
21521 if (!SIGNED_16BIT_OFFSET_P (value))
21524 /* DQ instruction (bottom 4 bits must be 0) for vectors. */
21525 HOST_WIDE_INT mask;
21526 if (GET_MODE_SIZE (mode) >= 16)
21529 /* DS instruction (bottom 2 bits must be 0). For 32-bit integers, we
21530 need to use DS instructions if we are sign-extending the value with
21531 LWA. For 32-bit floating point, we need DS instructions to load and
21532 store values to the traditional Altivec registers. */
21533 else if (GET_MODE_SIZE (mode) >= 4)
21536 /* QImode/HImode has no restrictions. */
21540 /* Return true if we must use a prefixed instruction. */
21541 return (value & mask) != 0;
21547 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
21548 /* Emit an assembler directive to set symbol visibility for DECL to
21549 VISIBILITY_TYPE. */
21552 rs6000_assemble_visibility (tree decl, int vis)
21557 /* Functions need to have their entry point symbol visibility set as
21558 well as their descriptor symbol visibility. */
21559 if (DEFAULT_ABI == ABI_AIX
21561 && TREE_CODE (decl) == FUNCTION_DECL)
21563 static const char * const visibility_types[] = {
21564 NULL, "protected", "hidden", "internal"
21567 const char *name, *type;
21569 name = ((* targetm.strip_name_encoding)
21570 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
21571 type = visibility_types[vis];
21573 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
21574 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
21577 default_assemble_visibility (decl, vis);
21582 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
21584 /* Reversal of FP compares takes care -- an ordered compare
21585 becomes an unordered compare and vice versa. */
21586 if (mode == CCFPmode
21587 && (!flag_finite_math_only
21588 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
21589 || code == UNEQ || code == LTGT))
21590 return reverse_condition_maybe_unordered (code);
21592 return reverse_condition (code);
21595 /* Generate a compare for CODE. Return a brand-new rtx that
21596 represents the result of the compare. */
21599 rs6000_generate_compare (rtx cmp, machine_mode mode)
21601 machine_mode comp_mode;
21602 rtx compare_result;
21603 enum rtx_code code = GET_CODE (cmp);
21604 rtx op0 = XEXP (cmp, 0);
21605 rtx op1 = XEXP (cmp, 1);
21607 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21608 comp_mode = CCmode;
21609 else if (FLOAT_MODE_P (mode))
21610 comp_mode = CCFPmode;
21611 else if (code == GTU || code == LTU
21612 || code == GEU || code == LEU)
21613 comp_mode = CCUNSmode;
21614 else if ((code == EQ || code == NE)
21615 && unsigned_reg_p (op0)
21616 && (unsigned_reg_p (op1)
21617 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
21618 /* These are unsigned values, perhaps there will be a later
21619 ordering compare that can be shared with this one. */
21620 comp_mode = CCUNSmode;
21622 comp_mode = CCmode;
21624 /* If we have an unsigned compare, make sure we don't have a signed value as
21626 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
21627 && INTVAL (op1) < 0)
21629 op0 = copy_rtx_if_shared (op0);
21630 op1 = force_reg (GET_MODE (op0), op1);
21631 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
21634 /* First, the compare. */
21635 compare_result = gen_reg_rtx (comp_mode);
21637 /* IEEE 128-bit support in VSX registers when we do not have hardware
21639 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21641 rtx libfunc = NULL_RTX;
21642 bool check_nan = false;
21649 libfunc = optab_libfunc (eq_optab, mode);
21654 libfunc = optab_libfunc (ge_optab, mode);
21659 libfunc = optab_libfunc (le_optab, mode);
21664 libfunc = optab_libfunc (unord_optab, mode);
21665 code = (code == UNORDERED) ? NE : EQ;
21671 libfunc = optab_libfunc (ge_optab, mode);
21672 code = (code == UNGE) ? GE : GT;
21678 libfunc = optab_libfunc (le_optab, mode);
21679 code = (code == UNLE) ? LE : LT;
21685 libfunc = optab_libfunc (eq_optab, mode);
21686 code = (code = UNEQ) ? EQ : NE;
21690 gcc_unreachable ();
21693 gcc_assert (libfunc);
21696 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21697 SImode, op0, mode, op1, mode);
21699 /* The library signals an exception for signalling NaNs, so we need to
21700 handle isgreater, etc. by first checking isordered. */
21703 rtx ne_rtx, normal_dest, unord_dest;
21704 rtx unord_func = optab_libfunc (unord_optab, mode);
21705 rtx join_label = gen_label_rtx ();
21706 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
21707 rtx unord_cmp = gen_reg_rtx (comp_mode);
21710 /* Test for either value being a NaN. */
21711 gcc_assert (unord_func);
21712 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
21713 SImode, op0, mode, op1, mode);
21715 /* Set value (0) if either value is a NaN, and jump to the join
21717 dest = gen_reg_rtx (SImode);
21718 emit_move_insn (dest, const1_rtx);
21719 emit_insn (gen_rtx_SET (unord_cmp,
21720 gen_rtx_COMPARE (comp_mode, unord_dest,
21723 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
21724 emit_jump_insn (gen_rtx_SET (pc_rtx,
21725 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
21729 /* Do the normal comparison, knowing that the values are not
21731 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21732 SImode, op0, mode, op1, mode);
21734 emit_insn (gen_cstoresi4 (dest,
21735 gen_rtx_fmt_ee (code, SImode, normal_dest,
21737 normal_dest, const0_rtx));
21739 /* Join NaN and non-Nan paths. Compare dest against 0. */
21740 emit_label (join_label);
21744 emit_insn (gen_rtx_SET (compare_result,
21745 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
21750 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
21751 CLOBBERs to match cmptf_internal2 pattern. */
21752 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
21753 && FLOAT128_IBM_P (GET_MODE (op0))
21754 && TARGET_HARD_FLOAT)
21755 emit_insn (gen_rtx_PARALLEL (VOIDmode,
21757 gen_rtx_SET (compare_result,
21758 gen_rtx_COMPARE (comp_mode, op0, op1)),
21759 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21760 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21761 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21762 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21763 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21764 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21765 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21766 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21767 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
21768 else if (GET_CODE (op1) == UNSPEC
21769 && XINT (op1, 1) == UNSPEC_SP_TEST)
21771 rtx op1b = XVECEXP (op1, 0, 0);
21772 comp_mode = CCEQmode;
21773 compare_result = gen_reg_rtx (CCEQmode);
21775 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
21777 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
21780 emit_insn (gen_rtx_SET (compare_result,
21781 gen_rtx_COMPARE (comp_mode, op0, op1)));
21784 /* Some kinds of FP comparisons need an OR operation;
21785 under flag_finite_math_only we don't bother. */
21786 if (FLOAT_MODE_P (mode)
21787 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
21788 && !flag_finite_math_only
21789 && (code == LE || code == GE
21790 || code == UNEQ || code == LTGT
21791 || code == UNGT || code == UNLT))
21793 enum rtx_code or1, or2;
21794 rtx or1_rtx, or2_rtx, compare2_rtx;
21795 rtx or_result = gen_reg_rtx (CCEQmode);
21799 case LE: or1 = LT; or2 = EQ; break;
21800 case GE: or1 = GT; or2 = EQ; break;
21801 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
21802 case LTGT: or1 = LT; or2 = GT; break;
21803 case UNGT: or1 = UNORDERED; or2 = GT; break;
21804 case UNLT: or1 = UNORDERED; or2 = LT; break;
21805 default: gcc_unreachable ();
21807 validate_condition_mode (or1, comp_mode);
21808 validate_condition_mode (or2, comp_mode);
21809 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
21810 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
21811 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
21812 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
21814 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
21816 compare_result = or_result;
21820 validate_condition_mode (code, GET_MODE (compare_result));
21822 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
21826 /* Return the diagnostic message string if the binary operation OP is
21827 not permitted on TYPE1 and TYPE2, NULL otherwise. */
21830 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
21834 machine_mode mode1 = TYPE_MODE (type1);
21835 machine_mode mode2 = TYPE_MODE (type2);
21837 /* For complex modes, use the inner type. */
21838 if (COMPLEX_MODE_P (mode1))
21839 mode1 = GET_MODE_INNER (mode1);
21841 if (COMPLEX_MODE_P (mode2))
21842 mode2 = GET_MODE_INNER (mode2);
21844 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
21845 double to intermix unless -mfloat128-convert. */
21846 if (mode1 == mode2)
21849 if (!TARGET_FLOAT128_CVT)
21851 if ((mode1 == KFmode && mode2 == IFmode)
21852 || (mode1 == IFmode && mode2 == KFmode))
21853 return N_("__float128 and __ibm128 cannot be used in the same "
21856 if (TARGET_IEEEQUAD
21857 && ((mode1 == IFmode && mode2 == TFmode)
21858 || (mode1 == TFmode && mode2 == IFmode)))
21859 return N_("__ibm128 and long double cannot be used in the same "
21862 if (!TARGET_IEEEQUAD
21863 && ((mode1 == KFmode && mode2 == TFmode)
21864 || (mode1 == TFmode && mode2 == KFmode)))
21865 return N_("__float128 and long double cannot be used in the same "
21873 /* Expand floating point conversion to/from __float128 and __ibm128. */
21876 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
21878 machine_mode dest_mode = GET_MODE (dest);
21879 machine_mode src_mode = GET_MODE (src);
21880 convert_optab cvt = unknown_optab;
21881 bool do_move = false;
21882 rtx libfunc = NULL_RTX;
21884 typedef rtx (*rtx_2func_t) (rtx, rtx);
21885 rtx_2func_t hw_convert = (rtx_2func_t)0;
21889 rtx_2func_t from_df;
21890 rtx_2func_t from_sf;
21891 rtx_2func_t from_si_sign;
21892 rtx_2func_t from_si_uns;
21893 rtx_2func_t from_di_sign;
21894 rtx_2func_t from_di_uns;
21897 rtx_2func_t to_si_sign;
21898 rtx_2func_t to_si_uns;
21899 rtx_2func_t to_di_sign;
21900 rtx_2func_t to_di_uns;
21901 } hw_conversions[2] = {
21902 /* convertions to/from KFmode */
21904 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
21905 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
21906 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
21907 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
21908 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
21909 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
21910 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
21911 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
21912 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
21913 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
21914 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
21915 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
21918 /* convertions to/from TFmode */
21920 gen_extenddftf2_hw, /* TFmode <- DFmode. */
21921 gen_extendsftf2_hw, /* TFmode <- SFmode. */
21922 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
21923 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
21924 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
21925 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
21926 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
21927 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
21928 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
21929 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
21930 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
21931 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
21935 if (dest_mode == src_mode)
21936 gcc_unreachable ();
21938 /* Eliminate memory operations. */
21940 src = force_reg (src_mode, src);
21944 rtx tmp = gen_reg_rtx (dest_mode);
21945 rs6000_expand_float128_convert (tmp, src, unsigned_p);
21946 rs6000_emit_move (dest, tmp, dest_mode);
21950 /* Convert to IEEE 128-bit floating point. */
21951 if (FLOAT128_IEEE_P (dest_mode))
21953 if (dest_mode == KFmode)
21955 else if (dest_mode == TFmode)
21958 gcc_unreachable ();
21964 hw_convert = hw_conversions[kf_or_tf].from_df;
21969 hw_convert = hw_conversions[kf_or_tf].from_sf;
21975 if (FLOAT128_IBM_P (src_mode))
21984 cvt = ufloat_optab;
21985 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
21989 cvt = sfloat_optab;
21990 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
21997 cvt = ufloat_optab;
21998 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
22002 cvt = sfloat_optab;
22003 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
22008 gcc_unreachable ();
22012 /* Convert from IEEE 128-bit floating point. */
22013 else if (FLOAT128_IEEE_P (src_mode))
22015 if (src_mode == KFmode)
22017 else if (src_mode == TFmode)
22020 gcc_unreachable ();
22026 hw_convert = hw_conversions[kf_or_tf].to_df;
22031 hw_convert = hw_conversions[kf_or_tf].to_sf;
22037 if (FLOAT128_IBM_P (dest_mode))
22047 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
22052 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
22060 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
22065 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
22070 gcc_unreachable ();
22074 /* Both IBM format. */
22075 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
22079 gcc_unreachable ();
22081 /* Handle conversion between TFmode/KFmode/IFmode. */
22083 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
22085 /* Handle conversion if we have hardware support. */
22086 else if (TARGET_FLOAT128_HW && hw_convert)
22087 emit_insn ((hw_convert) (dest, src));
22089 /* Call an external function to do the conversion. */
22090 else if (cvt != unknown_optab)
22092 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
22093 gcc_assert (libfunc != NULL_RTX);
22095 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
22098 gcc_assert (dest2 != NULL_RTX);
22099 if (!rtx_equal_p (dest, dest2))
22100 emit_move_insn (dest, dest2);
22104 gcc_unreachable ();
22110 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
22111 can be used as that dest register. Return the dest register. */
22114 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
22116 if (op2 == const0_rtx)
22119 if (GET_CODE (scratch) == SCRATCH)
22120 scratch = gen_reg_rtx (mode);
22122 if (logical_operand (op2, mode))
22123 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
22125 emit_insn (gen_rtx_SET (scratch,
22126 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
22132 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
22135 machine_mode op_mode;
22136 enum rtx_code cond_code;
22137 rtx result = operands[0];
22139 condition_rtx = rs6000_generate_compare (operands[1], mode);
22140 cond_code = GET_CODE (condition_rtx);
22142 if (cond_code == NE
22143 || cond_code == GE || cond_code == LE
22144 || cond_code == GEU || cond_code == LEU
22145 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
22147 rtx not_result = gen_reg_rtx (CCEQmode);
22148 rtx not_op, rev_cond_rtx;
22149 machine_mode cc_mode;
22151 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
22153 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
22154 SImode, XEXP (condition_rtx, 0), const0_rtx);
22155 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
22156 emit_insn (gen_rtx_SET (not_result, not_op));
22157 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
22160 op_mode = GET_MODE (XEXP (operands[1], 0));
22161 if (op_mode == VOIDmode)
22162 op_mode = GET_MODE (XEXP (operands[1], 1));
22164 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
22166 PUT_MODE (condition_rtx, DImode);
22167 convert_move (result, condition_rtx, 0);
22171 PUT_MODE (condition_rtx, SImode);
22172 emit_insn (gen_rtx_SET (result, condition_rtx));
22176 /* Emit a branch of kind CODE to location LOC. */
22179 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
22181 rtx condition_rtx, loc_ref;
22183 condition_rtx = rs6000_generate_compare (operands[0], mode);
22184 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
22185 emit_jump_insn (gen_rtx_SET (pc_rtx,
22186 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
22187 loc_ref, pc_rtx)));
22190 /* Return the string to output a conditional branch to LABEL, which is
22191 the operand template of the label, or NULL if the branch is really a
22192 conditional return.
22194 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
22195 condition code register and its mode specifies what kind of
22196 comparison we made.
22198 REVERSED is nonzero if we should reverse the sense of the comparison.
22200 INSN is the insn. */
22203 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
22205 static char string[64];
22206 enum rtx_code code = GET_CODE (op);
22207 rtx cc_reg = XEXP (op, 0);
22208 machine_mode mode = GET_MODE (cc_reg);
22209 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
22210 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
22211 int really_reversed = reversed ^ need_longbranch;
22217 validate_condition_mode (code, mode);
22219 /* Work out which way this really branches. We could use
22220 reverse_condition_maybe_unordered here always but this
22221 makes the resulting assembler clearer. */
22222 if (really_reversed)
22224 /* Reversal of FP compares takes care -- an ordered compare
22225 becomes an unordered compare and vice versa. */
22226 if (mode == CCFPmode)
22227 code = reverse_condition_maybe_unordered (code);
22229 code = reverse_condition (code);
22234 /* Not all of these are actually distinct opcodes, but
22235 we distinguish them for clarity of the resulting assembler. */
22236 case NE: case LTGT:
22237 ccode = "ne"; break;
22238 case EQ: case UNEQ:
22239 ccode = "eq"; break;
22241 ccode = "ge"; break;
22242 case GT: case GTU: case UNGT:
22243 ccode = "gt"; break;
22245 ccode = "le"; break;
22246 case LT: case LTU: case UNLT:
22247 ccode = "lt"; break;
22248 case UNORDERED: ccode = "un"; break;
22249 case ORDERED: ccode = "nu"; break;
22250 case UNGE: ccode = "nl"; break;
22251 case UNLE: ccode = "ng"; break;
22253 gcc_unreachable ();
22256 /* Maybe we have a guess as to how likely the branch is. */
22258 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
22259 if (note != NULL_RTX)
22261 /* PROB is the difference from 50%. */
22262 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
22263 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
22265 /* Only hint for highly probable/improbable branches on newer cpus when
22266 we have real profile data, as static prediction overrides processor
22267 dynamic prediction. For older cpus we may as well always hint, but
22268 assume not taken for branches that are very close to 50% as a
22269 mispredicted taken branch is more expensive than a
22270 mispredicted not-taken branch. */
22271 if (rs6000_always_hint
22272 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
22273 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
22274 && br_prob_note_reliable_p (note)))
22276 if (abs (prob) > REG_BR_PROB_BASE / 20
22277 && ((prob > 0) ^ need_longbranch))
22285 s += sprintf (s, "b%slr%s ", ccode, pred);
22287 s += sprintf (s, "b%s%s ", ccode, pred);
22289 /* We need to escape any '%' characters in the reg_names string.
22290 Assume they'd only be the first character.... */
22291 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
22293 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
22297 /* If the branch distance was too far, we may have to use an
22298 unconditional branch to go the distance. */
22299 if (need_longbranch)
22300 s += sprintf (s, ",$+8\n\tb %s", label);
22302 s += sprintf (s, ",%s", label);
22308 /* Return insn for VSX or Altivec comparisons. */
22311 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
22314 machine_mode mode = GET_MODE (op0);
22322 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22333 mask = gen_reg_rtx (mode);
22334 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
22341 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
22342 DMODE is expected destination mode. This is a recursive function. */
22345 rs6000_emit_vector_compare (enum rtx_code rcode,
22347 machine_mode dmode)
22350 bool swap_operands = false;
22351 bool try_again = false;
22353 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
22354 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
22356 /* See if the comparison works as is. */
22357 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22365 swap_operands = true;
22370 swap_operands = true;
22378 /* Invert condition and try again.
22379 e.g., A != B becomes ~(A==B). */
22381 enum rtx_code rev_code;
22382 enum insn_code nor_code;
22385 rev_code = reverse_condition_maybe_unordered (rcode);
22386 if (rev_code == UNKNOWN)
22389 nor_code = optab_handler (one_cmpl_optab, dmode);
22390 if (nor_code == CODE_FOR_nothing)
22393 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
22397 mask = gen_reg_rtx (dmode);
22398 emit_insn (GEN_FCN (nor_code) (mask, mask2));
22406 /* Try GT/GTU/LT/LTU OR EQ */
22409 enum insn_code ior_code;
22410 enum rtx_code new_code;
22431 gcc_unreachable ();
22434 ior_code = optab_handler (ior_optab, dmode);
22435 if (ior_code == CODE_FOR_nothing)
22438 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
22442 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
22446 mask = gen_reg_rtx (dmode);
22447 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
22458 std::swap (op0, op1);
22460 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22465 /* You only get two chances. */
22469 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
22470 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
22471 operands for the relation operation COND. */
22474 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
22475 rtx cond, rtx cc_op0, rtx cc_op1)
22477 machine_mode dest_mode = GET_MODE (dest);
22478 machine_mode mask_mode = GET_MODE (cc_op0);
22479 enum rtx_code rcode = GET_CODE (cond);
22480 machine_mode cc_mode = CCmode;
22483 bool invert_move = false;
22485 if (VECTOR_UNIT_NONE_P (dest_mode))
22488 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
22489 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
22493 /* Swap operands if we can, and fall back to doing the operation as
22494 specified, and doing a NOR to invert the test. */
22500 /* Invert condition and try again.
22501 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
22502 invert_move = true;
22503 rcode = reverse_condition_maybe_unordered (rcode);
22504 if (rcode == UNKNOWN)
22510 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
22512 /* Invert condition to avoid compound test. */
22513 invert_move = true;
22514 rcode = reverse_condition (rcode);
22522 /* Mark unsigned tests with CCUNSmode. */
22523 cc_mode = CCUNSmode;
22525 /* Invert condition to avoid compound test if necessary. */
22526 if (rcode == GEU || rcode == LEU)
22528 invert_move = true;
22529 rcode = reverse_condition (rcode);
22537 /* Get the vector mask for the given relational operations. */
22538 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
22544 std::swap (op_true, op_false);
22546 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
22547 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
22548 && (GET_CODE (op_true) == CONST_VECTOR
22549 || GET_CODE (op_false) == CONST_VECTOR))
22551 rtx constant_0 = CONST0_RTX (dest_mode);
22552 rtx constant_m1 = CONSTM1_RTX (dest_mode);
22554 if (op_true == constant_m1 && op_false == constant_0)
22556 emit_move_insn (dest, mask);
22560 else if (op_true == constant_0 && op_false == constant_m1)
22562 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
22566 /* If we can't use the vector comparison directly, perhaps we can use
22567 the mask for the true or false fields, instead of loading up a
22569 if (op_true == constant_m1)
22572 if (op_false == constant_0)
22576 if (!REG_P (op_true) && !SUBREG_P (op_true))
22577 op_true = force_reg (dest_mode, op_true);
22579 if (!REG_P (op_false) && !SUBREG_P (op_false))
22580 op_false = force_reg (dest_mode, op_false);
22582 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
22583 CONST0_RTX (dest_mode));
22584 emit_insn (gen_rtx_SET (dest,
22585 gen_rtx_IF_THEN_ELSE (dest_mode,
22592 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
22593 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
22594 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
22595 hardware has no such operation. */
22598 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22600 enum rtx_code code = GET_CODE (op);
22601 rtx op0 = XEXP (op, 0);
22602 rtx op1 = XEXP (op, 1);
22603 machine_mode compare_mode = GET_MODE (op0);
22604 machine_mode result_mode = GET_MODE (dest);
22605 bool max_p = false;
22607 if (result_mode != compare_mode)
22610 if (code == GE || code == GT)
22612 else if (code == LE || code == LT)
22617 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
22620 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
22626 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
22630 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
22631 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
22632 operands of the last comparison is nonzero/true, FALSE_COND if it is
22633 zero/false. Return 0 if the hardware has no such operation. */
22636 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22638 enum rtx_code code = GET_CODE (op);
22639 rtx op0 = XEXP (op, 0);
22640 rtx op1 = XEXP (op, 1);
22641 machine_mode result_mode = GET_MODE (dest);
22646 if (!can_create_pseudo_p ())
22659 code = swap_condition (code);
22660 std::swap (op0, op1);
22667 /* Generate: [(parallel [(set (dest)
22668 (if_then_else (op (cmp1) (cmp2))
22671 (clobber (scratch))])]. */
22673 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
22674 cmove_rtx = gen_rtx_SET (dest,
22675 gen_rtx_IF_THEN_ELSE (result_mode,
22680 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
22681 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22682 gen_rtvec (2, cmove_rtx, clobber_rtx)));
22687 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
22688 operands of the last comparison is nonzero/true, FALSE_COND if it
22689 is zero/false. Return 0 if the hardware has no such operation. */
22692 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22694 enum rtx_code code = GET_CODE (op);
22695 rtx op0 = XEXP (op, 0);
22696 rtx op1 = XEXP (op, 1);
22697 machine_mode compare_mode = GET_MODE (op0);
22698 machine_mode result_mode = GET_MODE (dest);
22700 bool is_against_zero;
22702 /* These modes should always match. */
22703 if (GET_MODE (op1) != compare_mode
22704 /* In the isel case however, we can use a compare immediate, so
22705 op1 may be a small constant. */
22706 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
22708 if (GET_MODE (true_cond) != result_mode)
22710 if (GET_MODE (false_cond) != result_mode)
22713 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
22714 if (TARGET_P9_MINMAX
22715 && (compare_mode == SFmode || compare_mode == DFmode)
22716 && (result_mode == SFmode || result_mode == DFmode))
22718 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
22721 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
22725 /* Don't allow using floating point comparisons for integer results for
22727 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
22730 /* First, work out if the hardware can do this at all, or
22731 if it's too slow.... */
22732 if (!FLOAT_MODE_P (compare_mode))
22735 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
22739 is_against_zero = op1 == CONST0_RTX (compare_mode);
22741 /* A floating-point subtract might overflow, underflow, or produce
22742 an inexact result, thus changing the floating-point flags, so it
22743 can't be generated if we care about that. It's safe if one side
22744 of the construct is zero, since then no subtract will be
22746 if (SCALAR_FLOAT_MODE_P (compare_mode)
22747 && flag_trapping_math && ! is_against_zero)
22750 /* Eliminate half of the comparisons by switching operands, this
22751 makes the remaining code simpler. */
22752 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
22753 || code == LTGT || code == LT || code == UNLE)
22755 code = reverse_condition_maybe_unordered (code);
22757 true_cond = false_cond;
22761 /* UNEQ and LTGT take four instructions for a comparison with zero,
22762 it'll probably be faster to use a branch here too. */
22763 if (code == UNEQ && HONOR_NANS (compare_mode))
22766 /* We're going to try to implement comparisons by performing
22767 a subtract, then comparing against zero. Unfortunately,
22768 Inf - Inf is NaN which is not zero, and so if we don't
22769 know that the operand is finite and the comparison
22770 would treat EQ different to UNORDERED, we can't do it. */
22771 if (HONOR_INFINITIES (compare_mode)
22772 && code != GT && code != UNGE
22773 && (!CONST_DOUBLE_P (op1)
22774 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
22775 /* Constructs of the form (a OP b ? a : b) are safe. */
22776 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
22777 || (! rtx_equal_p (op0, true_cond)
22778 && ! rtx_equal_p (op1, true_cond))))
22781 /* At this point we know we can use fsel. */
22783 /* Reduce the comparison to a comparison against zero. */
22784 if (! is_against_zero)
22786 temp = gen_reg_rtx (compare_mode);
22787 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
22789 op1 = CONST0_RTX (compare_mode);
22792 /* If we don't care about NaNs we can reduce some of the comparisons
22793 down to faster ones. */
22794 if (! HONOR_NANS (compare_mode))
22800 true_cond = false_cond;
22813 /* Now, reduce everything down to a GE. */
22820 temp = gen_reg_rtx (compare_mode);
22821 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22826 temp = gen_reg_rtx (compare_mode);
22827 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
22832 temp = gen_reg_rtx (compare_mode);
22833 emit_insn (gen_rtx_SET (temp,
22834 gen_rtx_NEG (compare_mode,
22835 gen_rtx_ABS (compare_mode, op0))));
22840 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
22841 temp = gen_reg_rtx (result_mode);
22842 emit_insn (gen_rtx_SET (temp,
22843 gen_rtx_IF_THEN_ELSE (result_mode,
22844 gen_rtx_GE (VOIDmode,
22846 true_cond, false_cond)));
22847 false_cond = true_cond;
22850 temp = gen_reg_rtx (compare_mode);
22851 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22856 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
22857 temp = gen_reg_rtx (result_mode);
22858 emit_insn (gen_rtx_SET (temp,
22859 gen_rtx_IF_THEN_ELSE (result_mode,
22860 gen_rtx_GE (VOIDmode,
22862 true_cond, false_cond)));
22863 true_cond = false_cond;
22866 temp = gen_reg_rtx (compare_mode);
22867 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22872 gcc_unreachable ();
22875 emit_insn (gen_rtx_SET (dest,
22876 gen_rtx_IF_THEN_ELSE (result_mode,
22877 gen_rtx_GE (VOIDmode,
22879 true_cond, false_cond)));
22883 /* Same as above, but for ints (isel). */
22886 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22888 rtx condition_rtx, cr;
22889 machine_mode mode = GET_MODE (dest);
22890 enum rtx_code cond_code;
22891 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
22894 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
22897 /* We still have to do the compare, because isel doesn't do a
22898 compare, it just looks at the CRx bits set by a previous compare
22900 condition_rtx = rs6000_generate_compare (op, mode);
22901 cond_code = GET_CODE (condition_rtx);
22902 cr = XEXP (condition_rtx, 0);
22903 signedp = GET_MODE (cr) == CCmode;
22905 isel_func = (mode == SImode
22906 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
22907 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
22911 case LT: case GT: case LTU: case GTU: case EQ:
22912 /* isel handles these directly. */
22916 /* We need to swap the sense of the comparison. */
22918 std::swap (false_cond, true_cond);
22919 PUT_CODE (condition_rtx, reverse_condition (cond_code));
22924 false_cond = force_reg (mode, false_cond);
22925 if (true_cond != const0_rtx)
22926 true_cond = force_reg (mode, true_cond);
22928 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
22934 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22936 machine_mode mode = GET_MODE (op0);
22940 /* VSX/altivec have direct min/max insns. */
22941 if ((code == SMAX || code == SMIN)
22942 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
22943 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
22945 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
22949 if (code == SMAX || code == SMIN)
22954 if (code == SMAX || code == UMAX)
22955 target = emit_conditional_move (dest, c, op0, op1, mode,
22956 op0, op1, mode, 0);
22958 target = emit_conditional_move (dest, c, op0, op1, mode,
22959 op1, op0, mode, 0);
22960 gcc_assert (target);
22961 if (target != dest)
22962 emit_move_insn (dest, target);
22965 /* A subroutine of the atomic operation splitters. Jump to LABEL if
22966 COND is true. Mark the jump as unlikely to be taken. */
22969 emit_unlikely_jump (rtx cond, rtx label)
22971 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
22972 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
22973 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
22976 /* A subroutine of the atomic operation splitters. Emit a load-locked
22977 instruction in MODE. For QI/HImode, possibly use a pattern than includes
22978 the zero_extend operation. */
22981 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
22983 rtx (*fn) (rtx, rtx) = NULL;
22988 fn = gen_load_lockedqi;
22991 fn = gen_load_lockedhi;
22994 if (GET_MODE (mem) == QImode)
22995 fn = gen_load_lockedqi_si;
22996 else if (GET_MODE (mem) == HImode)
22997 fn = gen_load_lockedhi_si;
22999 fn = gen_load_lockedsi;
23002 fn = gen_load_lockeddi;
23005 fn = gen_load_lockedti;
23008 gcc_unreachable ();
23010 emit_insn (fn (reg, mem));
23013 /* A subroutine of the atomic operation splitters. Emit a store-conditional
23014 instruction in MODE. */
23017 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
23019 rtx (*fn) (rtx, rtx, rtx) = NULL;
23024 fn = gen_store_conditionalqi;
23027 fn = gen_store_conditionalhi;
23030 fn = gen_store_conditionalsi;
23033 fn = gen_store_conditionaldi;
23036 fn = gen_store_conditionalti;
23039 gcc_unreachable ();
23042 /* Emit sync before stwcx. to address PPC405 Erratum. */
23043 if (PPC405_ERRATUM77)
23044 emit_insn (gen_hwsync ());
23046 emit_insn (fn (res, mem, val));
23049 /* Expand barriers before and after a load_locked/store_cond sequence. */
23052 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
23054 rtx addr = XEXP (mem, 0);
23056 if (!legitimate_indirect_address_p (addr, reload_completed)
23057 && !legitimate_indexed_address_p (addr, reload_completed))
23059 addr = force_reg (Pmode, addr);
23060 mem = replace_equiv_address_nv (mem, addr);
23065 case MEMMODEL_RELAXED:
23066 case MEMMODEL_CONSUME:
23067 case MEMMODEL_ACQUIRE:
23069 case MEMMODEL_RELEASE:
23070 case MEMMODEL_ACQ_REL:
23071 emit_insn (gen_lwsync ());
23073 case MEMMODEL_SEQ_CST:
23074 emit_insn (gen_hwsync ());
23077 gcc_unreachable ();
23083 rs6000_post_atomic_barrier (enum memmodel model)
23087 case MEMMODEL_RELAXED:
23088 case MEMMODEL_CONSUME:
23089 case MEMMODEL_RELEASE:
23091 case MEMMODEL_ACQUIRE:
23092 case MEMMODEL_ACQ_REL:
23093 case MEMMODEL_SEQ_CST:
23094 emit_insn (gen_isync ());
23097 gcc_unreachable ();
23101 /* A subroutine of the various atomic expanders. For sub-word operations,
23102 we must adjust things to operate on SImode. Given the original MEM,
23103 return a new aligned memory. Also build and return the quantities by
23104 which to shift and mask. */
23107 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
23109 rtx addr, align, shift, mask, mem;
23110 HOST_WIDE_INT shift_mask;
23111 machine_mode mode = GET_MODE (orig_mem);
23113 /* For smaller modes, we have to implement this via SImode. */
23114 shift_mask = (mode == QImode ? 0x18 : 0x10);
23116 addr = XEXP (orig_mem, 0);
23117 addr = force_reg (GET_MODE (addr), addr);
23119 /* Aligned memory containing subword. Generate a new memory. We
23120 do not want any of the existing MEM_ATTR data, as we're now
23121 accessing memory outside the original object. */
23122 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
23123 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23124 mem = gen_rtx_MEM (SImode, align);
23125 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
23126 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
23127 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
23129 /* Shift amount for subword relative to aligned word. */
23130 shift = gen_reg_rtx (SImode);
23131 addr = gen_lowpart (SImode, addr);
23132 rtx tmp = gen_reg_rtx (SImode);
23133 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
23134 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
23135 if (BYTES_BIG_ENDIAN)
23136 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
23137 shift, 1, OPTAB_LIB_WIDEN);
23140 /* Mask for insertion. */
23141 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
23142 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
23148 /* A subroutine of the various atomic expanders. For sub-word operands,
23149 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
23152 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
23156 x = gen_reg_rtx (SImode);
23157 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
23158 gen_rtx_NOT (SImode, mask),
23161 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
23166 /* A subroutine of the various atomic expanders. For sub-word operands,
23167 extract WIDE to NARROW via SHIFT. */
23170 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
23172 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
23173 wide, 1, OPTAB_LIB_WIDEN);
23174 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
23177 /* Expand an atomic compare and swap operation. */
23180 rs6000_expand_atomic_compare_and_swap (rtx operands[])
23182 rtx boolval, retval, mem, oldval, newval, cond;
23183 rtx label1, label2, x, mask, shift;
23184 machine_mode mode, orig_mode;
23185 enum memmodel mod_s, mod_f;
23188 boolval = operands[0];
23189 retval = operands[1];
23191 oldval = operands[3];
23192 newval = operands[4];
23193 is_weak = (INTVAL (operands[5]) != 0);
23194 mod_s = memmodel_base (INTVAL (operands[6]));
23195 mod_f = memmodel_base (INTVAL (operands[7]));
23196 orig_mode = mode = GET_MODE (mem);
23198 mask = shift = NULL_RTX;
23199 if (mode == QImode || mode == HImode)
23201 /* Before power8, we didn't have access to lbarx/lharx, so generate a
23202 lwarx and shift/mask operations. With power8, we need to do the
23203 comparison in SImode, but the store is still done in QI/HImode. */
23204 oldval = convert_modes (SImode, mode, oldval, 1);
23206 if (!TARGET_SYNC_HI_QI)
23208 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23210 /* Shift and mask OLDVAL into position with the word. */
23211 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
23212 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23214 /* Shift and mask NEWVAL into position within the word. */
23215 newval = convert_modes (SImode, mode, newval, 1);
23216 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
23217 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23220 /* Prepare to adjust the return value. */
23221 retval = gen_reg_rtx (SImode);
23224 else if (reg_overlap_mentioned_p (retval, oldval))
23225 oldval = copy_to_reg (oldval);
23227 if (mode != TImode && !reg_or_short_operand (oldval, mode))
23228 oldval = copy_to_mode_reg (mode, oldval);
23230 if (reg_overlap_mentioned_p (retval, newval))
23231 newval = copy_to_reg (newval);
23233 mem = rs6000_pre_atomic_barrier (mem, mod_s);
23238 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23239 emit_label (XEXP (label1, 0));
23241 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23243 emit_load_locked (mode, retval, mem);
23247 x = expand_simple_binop (SImode, AND, retval, mask,
23248 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23250 cond = gen_reg_rtx (CCmode);
23251 /* If we have TImode, synthesize a comparison. */
23252 if (mode != TImode)
23253 x = gen_rtx_COMPARE (CCmode, x, oldval);
23256 rtx xor1_result = gen_reg_rtx (DImode);
23257 rtx xor2_result = gen_reg_rtx (DImode);
23258 rtx or_result = gen_reg_rtx (DImode);
23259 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
23260 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
23261 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
23262 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
23264 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
23265 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
23266 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
23267 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
23270 emit_insn (gen_rtx_SET (cond, x));
23272 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23273 emit_unlikely_jump (x, label2);
23277 x = rs6000_mask_atomic_subword (retval, newval, mask);
23279 emit_store_conditional (orig_mode, cond, mem, x);
23283 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23284 emit_unlikely_jump (x, label1);
23287 if (!is_mm_relaxed (mod_f))
23288 emit_label (XEXP (label2, 0));
23290 rs6000_post_atomic_barrier (mod_s);
23292 if (is_mm_relaxed (mod_f))
23293 emit_label (XEXP (label2, 0));
23296 rs6000_finish_atomic_subword (operands[1], retval, shift);
23297 else if (mode != GET_MODE (operands[1]))
23298 convert_move (operands[1], retval, 1);
23300 /* In all cases, CR0 contains EQ on success, and NE on failure. */
23301 x = gen_rtx_EQ (SImode, cond, const0_rtx);
23302 emit_insn (gen_rtx_SET (boolval, x));
23305 /* Expand an atomic exchange operation. */
23308 rs6000_expand_atomic_exchange (rtx operands[])
23310 rtx retval, mem, val, cond;
23312 enum memmodel model;
23313 rtx label, x, mask, shift;
23315 retval = operands[0];
23318 model = memmodel_base (INTVAL (operands[3]));
23319 mode = GET_MODE (mem);
23321 mask = shift = NULL_RTX;
23322 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
23324 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23326 /* Shift and mask VAL into position with the word. */
23327 val = convert_modes (SImode, mode, val, 1);
23328 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23329 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23331 /* Prepare to adjust the return value. */
23332 retval = gen_reg_rtx (SImode);
23336 mem = rs6000_pre_atomic_barrier (mem, model);
23338 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23339 emit_label (XEXP (label, 0));
23341 emit_load_locked (mode, retval, mem);
23345 x = rs6000_mask_atomic_subword (retval, val, mask);
23347 cond = gen_reg_rtx (CCmode);
23348 emit_store_conditional (mode, cond, mem, x);
23350 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23351 emit_unlikely_jump (x, label);
23353 rs6000_post_atomic_barrier (model);
23356 rs6000_finish_atomic_subword (operands[0], retval, shift);
23359 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
23360 to perform. MEM is the memory on which to operate. VAL is the second
23361 operand of the binary operator. BEFORE and AFTER are optional locations to
23362 return the value of MEM either before of after the operation. MODEL_RTX
23363 is a CONST_INT containing the memory model to use. */
23366 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
23367 rtx orig_before, rtx orig_after, rtx model_rtx)
23369 enum memmodel model = memmodel_base (INTVAL (model_rtx));
23370 machine_mode mode = GET_MODE (mem);
23371 machine_mode store_mode = mode;
23372 rtx label, x, cond, mask, shift;
23373 rtx before = orig_before, after = orig_after;
23375 mask = shift = NULL_RTX;
23376 /* On power8, we want to use SImode for the operation. On previous systems,
23377 use the operation in a subword and shift/mask to get the proper byte or
23379 if (mode == QImode || mode == HImode)
23381 if (TARGET_SYNC_HI_QI)
23383 val = convert_modes (SImode, mode, val, 1);
23385 /* Prepare to adjust the return value. */
23386 before = gen_reg_rtx (SImode);
23388 after = gen_reg_rtx (SImode);
23393 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23395 /* Shift and mask VAL into position with the word. */
23396 val = convert_modes (SImode, mode, val, 1);
23397 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23398 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23404 /* We've already zero-extended VAL. That is sufficient to
23405 make certain that it does not affect other bits. */
23410 /* If we make certain that all of the other bits in VAL are
23411 set, that will be sufficient to not affect other bits. */
23412 x = gen_rtx_NOT (SImode, mask);
23413 x = gen_rtx_IOR (SImode, x, val);
23414 emit_insn (gen_rtx_SET (val, x));
23421 /* These will all affect bits outside the field and need
23422 adjustment via MASK within the loop. */
23426 gcc_unreachable ();
23429 /* Prepare to adjust the return value. */
23430 before = gen_reg_rtx (SImode);
23432 after = gen_reg_rtx (SImode);
23433 store_mode = mode = SImode;
23437 mem = rs6000_pre_atomic_barrier (mem, model);
23439 label = gen_label_rtx ();
23440 emit_label (label);
23441 label = gen_rtx_LABEL_REF (VOIDmode, label);
23443 if (before == NULL_RTX)
23444 before = gen_reg_rtx (mode);
23446 emit_load_locked (mode, before, mem);
23450 x = expand_simple_binop (mode, AND, before, val,
23451 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23452 after = expand_simple_unop (mode, NOT, x, after, 1);
23456 after = expand_simple_binop (mode, code, before, val,
23457 after, 1, OPTAB_LIB_WIDEN);
23463 x = expand_simple_binop (SImode, AND, after, mask,
23464 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23465 x = rs6000_mask_atomic_subword (before, x, mask);
23467 else if (store_mode != mode)
23468 x = convert_modes (store_mode, mode, x, 1);
23470 cond = gen_reg_rtx (CCmode);
23471 emit_store_conditional (store_mode, cond, mem, x);
23473 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23474 emit_unlikely_jump (x, label);
23476 rs6000_post_atomic_barrier (model);
23480 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
23481 then do the calcuations in a SImode register. */
23483 rs6000_finish_atomic_subword (orig_before, before, shift);
23485 rs6000_finish_atomic_subword (orig_after, after, shift);
23487 else if (store_mode != mode)
23489 /* QImode/HImode on machines with lbarx/lharx where we do the native
23490 operation and then do the calcuations in a SImode register. */
23492 convert_move (orig_before, before, 1);
23494 convert_move (orig_after, after, 1);
23496 else if (orig_after && after != orig_after)
23497 emit_move_insn (orig_after, after);
23500 /* Emit instructions to move SRC to DST. Called by splitters for
23501 multi-register moves. It will emit at most one instruction for
23502 each register that is accessed; that is, it won't emit li/lis pairs
23503 (or equivalent for 64-bit code). One of SRC or DST must be a hard
23507 rs6000_split_multireg_move (rtx dst, rtx src)
23509 /* The register number of the first register being moved. */
23511 /* The mode that is to be moved. */
23513 /* The mode that the move is being done in, and its size. */
23514 machine_mode reg_mode;
23516 /* The number of registers that will be moved. */
23519 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
23520 mode = GET_MODE (dst);
23521 nregs = hard_regno_nregs (reg, mode);
23522 if (FP_REGNO_P (reg))
23523 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
23524 (TARGET_HARD_FLOAT ? DFmode : SFmode);
23525 else if (ALTIVEC_REGNO_P (reg))
23526 reg_mode = V16QImode;
23528 reg_mode = word_mode;
23529 reg_mode_size = GET_MODE_SIZE (reg_mode);
23531 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
23533 /* TDmode residing in FP registers is special, since the ISA requires that
23534 the lower-numbered word of a register pair is always the most significant
23535 word, even in little-endian mode. This does not match the usual subreg
23536 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
23537 the appropriate constituent registers "by hand" in little-endian mode.
23539 Note we do not need to check for destructive overlap here since TDmode
23540 can only reside in even/odd register pairs. */
23541 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
23546 for (i = 0; i < nregs; i++)
23548 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
23549 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
23551 p_src = simplify_gen_subreg (reg_mode, src, mode,
23552 i * reg_mode_size);
23554 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
23555 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
23557 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
23558 i * reg_mode_size);
23560 emit_insn (gen_rtx_SET (p_dst, p_src));
23566 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
23568 /* Move register range backwards, if we might have destructive
23571 for (i = nregs - 1; i >= 0; i--)
23572 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23573 i * reg_mode_size),
23574 simplify_gen_subreg (reg_mode, src, mode,
23575 i * reg_mode_size)));
23581 bool used_update = false;
23582 rtx restore_basereg = NULL_RTX;
23584 if (MEM_P (src) && INT_REGNO_P (reg))
23588 if (GET_CODE (XEXP (src, 0)) == PRE_INC
23589 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
23592 breg = XEXP (XEXP (src, 0), 0);
23593 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
23594 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
23595 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
23596 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23597 src = replace_equiv_address (src, breg);
23599 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
23601 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
23603 rtx basereg = XEXP (XEXP (src, 0), 0);
23606 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
23607 emit_insn (gen_rtx_SET (ndst,
23608 gen_rtx_MEM (reg_mode,
23610 used_update = true;
23613 emit_insn (gen_rtx_SET (basereg,
23614 XEXP (XEXP (src, 0), 1)));
23615 src = replace_equiv_address (src, basereg);
23619 rtx basereg = gen_rtx_REG (Pmode, reg);
23620 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
23621 src = replace_equiv_address (src, basereg);
23625 breg = XEXP (src, 0);
23626 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
23627 breg = XEXP (breg, 0);
23629 /* If the base register we are using to address memory is
23630 also a destination reg, then change that register last. */
23632 && REGNO (breg) >= REGNO (dst)
23633 && REGNO (breg) < REGNO (dst) + nregs)
23634 j = REGNO (breg) - REGNO (dst);
23636 else if (MEM_P (dst) && INT_REGNO_P (reg))
23640 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
23641 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
23644 breg = XEXP (XEXP (dst, 0), 0);
23645 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
23646 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
23647 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
23649 /* We have to update the breg before doing the store.
23650 Use store with update, if available. */
23654 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23655 emit_insn (TARGET_32BIT
23656 ? (TARGET_POWERPC64
23657 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
23658 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
23659 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
23660 used_update = true;
23663 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23664 dst = replace_equiv_address (dst, breg);
23666 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
23667 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
23669 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
23671 rtx basereg = XEXP (XEXP (dst, 0), 0);
23674 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23675 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
23678 used_update = true;
23681 emit_insn (gen_rtx_SET (basereg,
23682 XEXP (XEXP (dst, 0), 1)));
23683 dst = replace_equiv_address (dst, basereg);
23687 rtx basereg = XEXP (XEXP (dst, 0), 0);
23688 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
23689 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
23691 && REG_P (offsetreg)
23692 && REGNO (basereg) != REGNO (offsetreg));
23693 if (REGNO (basereg) == 0)
23695 rtx tmp = offsetreg;
23696 offsetreg = basereg;
23699 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
23700 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
23701 dst = replace_equiv_address (dst, basereg);
23704 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
23705 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
23708 for (i = 0; i < nregs; i++)
23710 /* Calculate index to next subword. */
23715 /* If compiler already emitted move of first word by
23716 store with update, no need to do anything. */
23717 if (j == 0 && used_update)
23720 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23721 j * reg_mode_size),
23722 simplify_gen_subreg (reg_mode, src, mode,
23723 j * reg_mode_size)));
23725 if (restore_basereg != NULL_RTX)
23726 emit_insn (restore_basereg);
23730 static GTY(()) alias_set_type set = -1;
23733 get_TOC_alias_set (void)
23736 set = new_alias_set ();
23740 /* Return the internal arg pointer used for function incoming
23741 arguments. When -fsplit-stack, the arg pointer is r12 so we need
23742 to copy it to a pseudo in order for it to be preserved over calls
23743 and suchlike. We'd really like to use a pseudo here for the
23744 internal arg pointer but data-flow analysis is not prepared to
23745 accept pseudos as live at the beginning of a function. */
23748 rs6000_internal_arg_pointer (void)
23750 if (flag_split_stack
23751 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
23755 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
23759 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
23760 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
23762 /* Put the pseudo initialization right after the note at the
23763 beginning of the function. */
23764 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
23765 gen_rtx_REG (Pmode, 12));
23766 push_topmost_sequence ();
23767 emit_insn_after (pat, get_insns ());
23768 pop_topmost_sequence ();
23770 rtx ret = plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
23771 FIRST_PARM_OFFSET (current_function_decl));
23772 return copy_to_reg (ret);
23774 return virtual_incoming_args_rtx;
23777 /* We may have to tell the dataflow pass that the split stack prologue
23778 is initializing a register. */
23781 rs6000_live_on_entry (bitmap regs)
23783 if (flag_split_stack)
23784 bitmap_set_bit (regs, 12);
23788 /* A C compound statement that outputs the assembler code for a thunk
23789 function, used to implement C++ virtual function calls with
23790 multiple inheritance. The thunk acts as a wrapper around a virtual
23791 function, adjusting the implicit object parameter before handing
23792 control off to the real function.
23794 First, emit code to add the integer DELTA to the location that
23795 contains the incoming first argument. Assume that this argument
23796 contains a pointer, and is the one used to pass the `this' pointer
23797 in C++. This is the incoming argument *before* the function
23798 prologue, e.g. `%o0' on a sparc. The addition must preserve the
23799 values of all other incoming arguments.
23801 After the addition, emit code to jump to FUNCTION, which is a
23802 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
23803 not touch the return address. Hence returning from FUNCTION will
23804 return to whoever called the current `thunk'.
23806 The effect must be as if FUNCTION had been called directly with the
23807 adjusted first argument. This macro is responsible for emitting
23808 all of the code for a thunk function; output_function_prologue()
23809 and output_function_epilogue() are not invoked.
23811 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
23812 been extracted from it.) It might possibly be useful on some
23813 targets, but probably not.
23815 If you do not define this macro, the target-independent code in the
23816 C++ frontend will generate a less efficient heavyweight thunk that
23817 calls FUNCTION instead of jumping to it. The generic approach does
23818 not support varargs. */
23821 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
23822 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
23825 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
23826 rtx this_rtx, funexp;
23829 reload_completed = 1;
23830 epilogue_completed = 1;
23832 /* Mark the end of the (empty) prologue. */
23833 emit_note (NOTE_INSN_PROLOGUE_END);
23835 /* Find the "this" pointer. If the function returns a structure,
23836 the structure return pointer is in r3. */
23837 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
23838 this_rtx = gen_rtx_REG (Pmode, 4);
23840 this_rtx = gen_rtx_REG (Pmode, 3);
23842 /* Apply the constant offset, if required. */
23844 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
23846 /* Apply the offset from the vtable, if required. */
23849 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
23850 rtx tmp = gen_rtx_REG (Pmode, 12);
23852 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
23853 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
23855 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
23856 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
23860 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
23862 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
23864 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
23867 /* Generate a tail call to the target function. */
23868 if (!TREE_USED (function))
23870 assemble_external (function);
23871 TREE_USED (function) = 1;
23873 funexp = XEXP (DECL_RTL (function), 0);
23874 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
23877 if (MACHOPIC_INDIRECT)
23878 funexp = machopic_indirect_call_target (funexp);
23881 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
23882 generate sibcall RTL explicitly. */
23883 insn = emit_call_insn (
23884 gen_rtx_PARALLEL (VOIDmode,
23886 gen_rtx_CALL (VOIDmode,
23887 funexp, const0_rtx),
23888 gen_rtx_USE (VOIDmode, const0_rtx),
23889 simple_return_rtx)));
23890 SIBLING_CALL_P (insn) = 1;
23893 /* Run just enough of rest_of_compilation to get the insns emitted.
23894 There's not really enough bulk here to make other passes such as
23895 instruction scheduling worth while. */
23896 insn = get_insns ();
23897 shorten_branches (insn);
23898 assemble_start_function (thunk_fndecl, fnname);
23899 final_start_function (insn, file, 1);
23900 final (insn, file, 1);
23901 final_end_function ();
23902 assemble_end_function (thunk_fndecl, fnname);
23904 reload_completed = 0;
23905 epilogue_completed = 0;
23908 /* A quick summary of the various types of 'constant-pool tables'
23911 Target Flags Name One table per
23912 AIX (none) AIX TOC object file
23913 AIX -mfull-toc AIX TOC object file
23914 AIX -mminimal-toc AIX minimal TOC translation unit
23915 SVR4/EABI (none) SVR4 SDATA object file
23916 SVR4/EABI -fpic SVR4 pic object file
23917 SVR4/EABI -fPIC SVR4 PIC translation unit
23918 SVR4/EABI -mrelocatable EABI TOC function
23919 SVR4/EABI -maix AIX TOC object file
23920 SVR4/EABI -maix -mminimal-toc
23921 AIX minimal TOC translation unit
23923 Name Reg. Set by entries contains:
23924 made by addrs? fp? sum?
23926 AIX TOC 2 crt0 as Y option option
23927 AIX minimal TOC 30 prolog gcc Y Y option
23928 SVR4 SDATA 13 crt0 gcc N Y N
23929 SVR4 pic 30 prolog ld Y not yet N
23930 SVR4 PIC 30 prolog gcc Y option option
23931 EABI TOC 30 prolog gcc Y option option
23935 /* Hash functions for the hash table. */
23938 rs6000_hash_constant (rtx k)
23940 enum rtx_code code = GET_CODE (k);
23941 machine_mode mode = GET_MODE (k);
23942 unsigned result = (code << 3) ^ mode;
23943 const char *format;
23946 format = GET_RTX_FORMAT (code);
23947 flen = strlen (format);
23953 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
23955 case CONST_WIDE_INT:
23958 flen = CONST_WIDE_INT_NUNITS (k);
23959 for (i = 0; i < flen; i++)
23960 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
23965 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
23975 for (; fidx < flen; fidx++)
23976 switch (format[fidx])
23981 const char *str = XSTR (k, fidx);
23982 len = strlen (str);
23983 result = result * 613 + len;
23984 for (i = 0; i < len; i++)
23985 result = result * 613 + (unsigned) str[i];
23990 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
23994 result = result * 613 + (unsigned) XINT (k, fidx);
23997 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
23998 result = result * 613 + (unsigned) XWINT (k, fidx);
24002 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
24003 result = result * 613 + (unsigned) (XWINT (k, fidx)
24010 gcc_unreachable ();
24017 toc_hasher::hash (toc_hash_struct *thc)
24019 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
24022 /* Compare H1 and H2 for equivalence. */
24025 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
24030 if (h1->key_mode != h2->key_mode)
24033 return rtx_equal_p (r1, r2);
24036 /* These are the names given by the C++ front-end to vtables, and
24037 vtable-like objects. Ideally, this logic should not be here;
24038 instead, there should be some programmatic way of inquiring as
24039 to whether or not an object is a vtable. */
24041 #define VTABLE_NAME_P(NAME) \
24042 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
24043 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
24044 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
24045 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
24046 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
24048 #ifdef NO_DOLLAR_IN_LABEL
24049 /* Return a GGC-allocated character string translating dollar signs in
24050 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
24053 rs6000_xcoff_strip_dollar (const char *name)
24059 q = (const char *) strchr (name, '$');
24061 if (q == 0 || q == name)
24064 len = strlen (name);
24065 strip = XALLOCAVEC (char, len + 1);
24066 strcpy (strip, name);
24067 p = strip + (q - name);
24071 p = strchr (p + 1, '$');
24074 return ggc_alloc_string (strip, len);
24079 rs6000_output_symbol_ref (FILE *file, rtx x)
24081 const char *name = XSTR (x, 0);
24083 /* Currently C++ toc references to vtables can be emitted before it
24084 is decided whether the vtable is public or private. If this is
24085 the case, then the linker will eventually complain that there is
24086 a reference to an unknown section. Thus, for vtables only,
24087 we emit the TOC reference to reference the identifier and not the
24089 if (VTABLE_NAME_P (name))
24091 RS6000_OUTPUT_BASENAME (file, name);
24094 assemble_name (file, name);
24097 /* Output a TOC entry. We derive the entry name from what is being
24101 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
24104 const char *name = buf;
24106 HOST_WIDE_INT offset = 0;
24108 gcc_assert (!TARGET_NO_TOC);
24110 /* When the linker won't eliminate them, don't output duplicate
24111 TOC entries (this happens on AIX if there is any kind of TOC,
24112 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
24114 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
24116 struct toc_hash_struct *h;
24118 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
24119 time because GGC is not initialized at that point. */
24120 if (toc_hash_table == NULL)
24121 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
24123 h = ggc_alloc<toc_hash_struct> ();
24125 h->key_mode = mode;
24126 h->labelno = labelno;
24128 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
24129 if (*found == NULL)
24131 else /* This is indeed a duplicate.
24132 Set this label equal to that label. */
24134 fputs ("\t.set ", file);
24135 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
24136 fprintf (file, "%d,", labelno);
24137 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
24138 fprintf (file, "%d\n", ((*found)->labelno));
24141 if (TARGET_XCOFF && SYMBOL_REF_P (x)
24142 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
24143 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
24145 fputs ("\t.set ", file);
24146 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
24147 fprintf (file, "%d,", labelno);
24148 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
24149 fprintf (file, "%d\n", ((*found)->labelno));
24156 /* If we're going to put a double constant in the TOC, make sure it's
24157 aligned properly when strict alignment is on. */
24158 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
24159 && STRICT_ALIGNMENT
24160 && GET_MODE_BITSIZE (mode) >= 64
24161 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
24162 ASM_OUTPUT_ALIGN (file, 3);
24165 (*targetm.asm_out.internal_label) (file, "LC", labelno);
24167 /* Handle FP constants specially. Note that if we have a minimal
24168 TOC, things we put here aren't actually in the TOC, so we can allow
24170 if (CONST_DOUBLE_P (x)
24171 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
24172 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
24176 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
24177 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
24179 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
24183 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24184 fputs (DOUBLE_INT_ASM_OP, file);
24186 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
24187 k[0] & 0xffffffff, k[1] & 0xffffffff,
24188 k[2] & 0xffffffff, k[3] & 0xffffffff);
24189 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
24190 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
24191 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
24192 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
24193 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
24198 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24199 fputs ("\t.long ", file);
24201 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
24202 k[0] & 0xffffffff, k[1] & 0xffffffff,
24203 k[2] & 0xffffffff, k[3] & 0xffffffff);
24204 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
24205 k[0] & 0xffffffff, k[1] & 0xffffffff,
24206 k[2] & 0xffffffff, k[3] & 0xffffffff);
24210 else if (CONST_DOUBLE_P (x)
24211 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
24215 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
24216 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
24218 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
24222 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24223 fputs (DOUBLE_INT_ASM_OP, file);
24225 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
24226 k[0] & 0xffffffff, k[1] & 0xffffffff);
24227 fprintf (file, "0x%lx%08lx\n",
24228 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
24229 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
24234 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24235 fputs ("\t.long ", file);
24237 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
24238 k[0] & 0xffffffff, k[1] & 0xffffffff);
24239 fprintf (file, "0x%lx,0x%lx\n",
24240 k[0] & 0xffffffff, k[1] & 0xffffffff);
24244 else if (CONST_DOUBLE_P (x)
24245 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
24249 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
24250 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
24252 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
24256 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24257 fputs (DOUBLE_INT_ASM_OP, file);
24259 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
24260 if (WORDS_BIG_ENDIAN)
24261 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
24263 fprintf (file, "0x%lx\n", l & 0xffffffff);
24268 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24269 fputs ("\t.long ", file);
24271 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
24272 fprintf (file, "0x%lx\n", l & 0xffffffff);
24276 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
24278 unsigned HOST_WIDE_INT low;
24279 HOST_WIDE_INT high;
24281 low = INTVAL (x) & 0xffffffff;
24282 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
24284 /* TOC entries are always Pmode-sized, so when big-endian
24285 smaller integer constants in the TOC need to be padded.
24286 (This is still a win over putting the constants in
24287 a separate constant pool, because then we'd have
24288 to have both a TOC entry _and_ the actual constant.)
24290 For a 32-bit target, CONST_INT values are loaded and shifted
24291 entirely within `low' and can be stored in one TOC entry. */
24293 /* It would be easy to make this work, but it doesn't now. */
24294 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
24296 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
24299 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
24300 high = (HOST_WIDE_INT) low >> 32;
24306 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24307 fputs (DOUBLE_INT_ASM_OP, file);
24309 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
24310 (long) high & 0xffffffff, (long) low & 0xffffffff);
24311 fprintf (file, "0x%lx%08lx\n",
24312 (long) high & 0xffffffff, (long) low & 0xffffffff);
24317 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
24319 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24320 fputs ("\t.long ", file);
24322 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
24323 (long) high & 0xffffffff, (long) low & 0xffffffff);
24324 fprintf (file, "0x%lx,0x%lx\n",
24325 (long) high & 0xffffffff, (long) low & 0xffffffff);
24329 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24330 fputs ("\t.long ", file);
24332 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
24333 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
24339 if (GET_CODE (x) == CONST)
24341 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
24342 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
24344 base = XEXP (XEXP (x, 0), 0);
24345 offset = INTVAL (XEXP (XEXP (x, 0), 1));
24348 switch (GET_CODE (base))
24351 name = XSTR (base, 0);
24355 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
24356 CODE_LABEL_NUMBER (XEXP (base, 0)));
24360 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
24364 gcc_unreachable ();
24367 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24368 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
24371 fputs ("\t.tc ", file);
24372 RS6000_OUTPUT_BASENAME (file, name);
24375 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
24377 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
24379 /* Mark large TOC symbols on AIX with [TE] so they are mapped
24380 after other TOC symbols, reducing overflow of small TOC access
24381 to [TC] symbols. */
24382 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
24383 ? "[TE]," : "[TC],", file);
24386 /* Currently C++ toc references to vtables can be emitted before it
24387 is decided whether the vtable is public or private. If this is
24388 the case, then the linker will eventually complain that there is
24389 a TOC reference to an unknown section. Thus, for vtables only,
24390 we emit the TOC reference to reference the symbol and not the
24392 if (VTABLE_NAME_P (name))
24394 RS6000_OUTPUT_BASENAME (file, name);
24396 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
24397 else if (offset > 0)
24398 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
24401 output_addr_const (file, x);
24404 if (TARGET_XCOFF && SYMBOL_REF_P (base))
24406 switch (SYMBOL_REF_TLS_MODEL (base))
24410 case TLS_MODEL_LOCAL_EXEC:
24411 fputs ("@le", file);
24413 case TLS_MODEL_INITIAL_EXEC:
24414 fputs ("@ie", file);
24416 /* Use global-dynamic for local-dynamic. */
24417 case TLS_MODEL_GLOBAL_DYNAMIC:
24418 case TLS_MODEL_LOCAL_DYNAMIC:
24420 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
24421 fputs ("\t.tc .", file);
24422 RS6000_OUTPUT_BASENAME (file, name);
24423 fputs ("[TC],", file);
24424 output_addr_const (file, x);
24425 fputs ("@m", file);
24428 gcc_unreachable ();
24436 /* Output an assembler pseudo-op to write an ASCII string of N characters
24437 starting at P to FILE.
24439 On the RS/6000, we have to do this using the .byte operation and
24440 write out special characters outside the quoted string.
24441 Also, the assembler is broken; very long strings are truncated,
24442 so we must artificially break them up early. */
24445 output_ascii (FILE *file, const char *p, int n)
24448 int i, count_string;
24449 const char *for_string = "\t.byte \"";
24450 const char *for_decimal = "\t.byte ";
24451 const char *to_close = NULL;
24454 for (i = 0; i < n; i++)
24457 if (c >= ' ' && c < 0177)
24460 fputs (for_string, file);
24463 /* Write two quotes to get one. */
24471 for_decimal = "\"\n\t.byte ";
24475 if (count_string >= 512)
24477 fputs (to_close, file);
24479 for_string = "\t.byte \"";
24480 for_decimal = "\t.byte ";
24488 fputs (for_decimal, file);
24489 fprintf (file, "%d", c);
24491 for_string = "\n\t.byte \"";
24492 for_decimal = ", ";
24498 /* Now close the string if we have written one. Then end the line. */
24500 fputs (to_close, file);
24503 /* Generate a unique section name for FILENAME for a section type
24504 represented by SECTION_DESC. Output goes into BUF.
24506 SECTION_DESC can be any string, as long as it is different for each
24507 possible section type.
24509 We name the section in the same manner as xlc. The name begins with an
24510 underscore followed by the filename (after stripping any leading directory
24511 names) with the last period replaced by the string SECTION_DESC. If
24512 FILENAME does not contain a period, SECTION_DESC is appended to the end of
24516 rs6000_gen_section_name (char **buf, const char *filename,
24517 const char *section_desc)
24519 const char *q, *after_last_slash, *last_period = 0;
24523 after_last_slash = filename;
24524 for (q = filename; *q; q++)
24527 after_last_slash = q + 1;
24528 else if (*q == '.')
24532 len = strlen (after_last_slash) + strlen (section_desc) + 2;
24533 *buf = (char *) xmalloc (len);
24538 for (q = after_last_slash; *q; q++)
24540 if (q == last_period)
24542 strcpy (p, section_desc);
24543 p += strlen (section_desc);
24547 else if (ISALNUM (*q))
24551 if (last_period == 0)
24552 strcpy (p, section_desc);
24557 /* Emit profile function. */
24560 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
24562 /* Non-standard profiling for kernels, which just saves LR then calls
24563 _mcount without worrying about arg saves. The idea is to change
24564 the function prologue as little as possible as it isn't easy to
24565 account for arg save/restore code added just for _mcount. */
24566 if (TARGET_PROFILE_KERNEL)
24569 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24571 #ifndef NO_PROFILE_COUNTERS
24572 # define NO_PROFILE_COUNTERS 0
24574 if (NO_PROFILE_COUNTERS)
24575 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
24576 LCT_NORMAL, VOIDmode);
24580 const char *label_name;
24583 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
24584 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
24585 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
24587 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
24588 LCT_NORMAL, VOIDmode, fun, Pmode);
24591 else if (DEFAULT_ABI == ABI_DARWIN)
24593 const char *mcount_name = RS6000_MCOUNT;
24594 int caller_addr_regno = LR_REGNO;
24596 /* Be conservative and always set this, at least for now. */
24597 crtl->uses_pic_offset_table = 1;
24600 /* For PIC code, set up a stub and collect the caller's address
24601 from r0, which is where the prologue puts it. */
24602 if (MACHOPIC_INDIRECT
24603 && crtl->uses_pic_offset_table)
24604 caller_addr_regno = 0;
24606 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
24607 LCT_NORMAL, VOIDmode,
24608 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
24612 /* Write function profiler code. */
24615 output_function_profiler (FILE *file, int labelno)
24619 switch (DEFAULT_ABI)
24622 gcc_unreachable ();
24627 warning (0, "no profiling of 64-bit code for this ABI");
24630 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
24631 fprintf (file, "\tmflr %s\n", reg_names[0]);
24632 if (NO_PROFILE_COUNTERS)
24634 asm_fprintf (file, "\tstw %s,4(%s)\n",
24635 reg_names[0], reg_names[1]);
24637 else if (TARGET_SECURE_PLT && flag_pic)
24639 if (TARGET_LINK_STACK)
24642 get_ppc476_thunk_name (name);
24643 asm_fprintf (file, "\tbl %s\n", name);
24646 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
24647 asm_fprintf (file, "\tstw %s,4(%s)\n",
24648 reg_names[0], reg_names[1]);
24649 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
24650 asm_fprintf (file, "\taddis %s,%s,",
24651 reg_names[12], reg_names[12]);
24652 assemble_name (file, buf);
24653 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
24654 assemble_name (file, buf);
24655 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
24657 else if (flag_pic == 1)
24659 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
24660 asm_fprintf (file, "\tstw %s,4(%s)\n",
24661 reg_names[0], reg_names[1]);
24662 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
24663 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
24664 assemble_name (file, buf);
24665 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
24667 else if (flag_pic > 1)
24669 asm_fprintf (file, "\tstw %s,4(%s)\n",
24670 reg_names[0], reg_names[1]);
24671 /* Now, we need to get the address of the label. */
24672 if (TARGET_LINK_STACK)
24675 get_ppc476_thunk_name (name);
24676 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
24677 assemble_name (file, buf);
24678 fputs ("-.\n1:", file);
24679 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
24680 asm_fprintf (file, "\taddi %s,%s,4\n",
24681 reg_names[11], reg_names[11]);
24685 fputs ("\tbcl 20,31,1f\n\t.long ", file);
24686 assemble_name (file, buf);
24687 fputs ("-.\n1:", file);
24688 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
24690 asm_fprintf (file, "\tlwz %s,0(%s)\n",
24691 reg_names[0], reg_names[11]);
24692 asm_fprintf (file, "\tadd %s,%s,%s\n",
24693 reg_names[0], reg_names[0], reg_names[11]);
24697 asm_fprintf (file, "\tlis %s,", reg_names[12]);
24698 assemble_name (file, buf);
24699 fputs ("@ha\n", file);
24700 asm_fprintf (file, "\tstw %s,4(%s)\n",
24701 reg_names[0], reg_names[1]);
24702 asm_fprintf (file, "\tla %s,", reg_names[0]);
24703 assemble_name (file, buf);
24704 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
24707 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
24708 fprintf (file, "\tbl %s%s\n",
24709 RS6000_MCOUNT, flag_pic ? "@plt" : "");
24715 /* Don't do anything, done in output_profile_hook (). */
24722 /* The following variable value is the last issued insn. */
24724 static rtx_insn *last_scheduled_insn;
24726 /* The following variable helps to balance issuing of load and
24727 store instructions */
24729 static int load_store_pendulum;
24731 /* The following variable helps pair divide insns during scheduling. */
24732 static int divide_cnt;
24733 /* The following variable helps pair and alternate vector and vector load
24734 insns during scheduling. */
24735 static int vec_pairing;
24738 /* Power4 load update and store update instructions are cracked into a
24739 load or store and an integer insn which are executed in the same cycle.
24740 Branches have their own dispatch slot which does not count against the
24741 GCC issue rate, but it changes the program flow so there are no other
24742 instructions to issue in this cycle. */
24745 rs6000_variable_issue_1 (rtx_insn *insn, int more)
24747 last_scheduled_insn = insn;
24748 if (GET_CODE (PATTERN (insn)) == USE
24749 || GET_CODE (PATTERN (insn)) == CLOBBER)
24751 cached_can_issue_more = more;
24752 return cached_can_issue_more;
24755 if (insn_terminates_group_p (insn, current_group))
24757 cached_can_issue_more = 0;
24758 return cached_can_issue_more;
24761 /* If no reservation, but reach here */
24762 if (recog_memoized (insn) < 0)
24765 if (rs6000_sched_groups)
24767 if (is_microcoded_insn (insn))
24768 cached_can_issue_more = 0;
24769 else if (is_cracked_insn (insn))
24770 cached_can_issue_more = more > 2 ? more - 2 : 0;
24772 cached_can_issue_more = more - 1;
24774 return cached_can_issue_more;
24777 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
24780 cached_can_issue_more = more - 1;
24781 return cached_can_issue_more;
24785 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
24787 int r = rs6000_variable_issue_1 (insn, more);
24789 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
24793 /* Adjust the cost of a scheduling dependency. Return the new cost of
24794 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
24797 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
24800 enum attr_type attr_type;
24802 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
24809 /* Data dependency; DEP_INSN writes a register that INSN reads
24810 some cycles later. */
24812 /* Separate a load from a narrower, dependent store. */
24813 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
24814 || rs6000_tune == PROCESSOR_FUTURE)
24815 && GET_CODE (PATTERN (insn)) == SET
24816 && GET_CODE (PATTERN (dep_insn)) == SET
24817 && MEM_P (XEXP (PATTERN (insn), 1))
24818 && MEM_P (XEXP (PATTERN (dep_insn), 0))
24819 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
24820 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
24823 attr_type = get_attr_type (insn);
24828 /* Tell the first scheduling pass about the latency between
24829 a mtctr and bctr (and mtlr and br/blr). The first
24830 scheduling pass will not know about this latency since
24831 the mtctr instruction, which has the latency associated
24832 to it, will be generated by reload. */
24835 /* Leave some extra cycles between a compare and its
24836 dependent branch, to inhibit expensive mispredicts. */
24837 if ((rs6000_tune == PROCESSOR_PPC603
24838 || rs6000_tune == PROCESSOR_PPC604
24839 || rs6000_tune == PROCESSOR_PPC604e
24840 || rs6000_tune == PROCESSOR_PPC620
24841 || rs6000_tune == PROCESSOR_PPC630
24842 || rs6000_tune == PROCESSOR_PPC750
24843 || rs6000_tune == PROCESSOR_PPC7400
24844 || rs6000_tune == PROCESSOR_PPC7450
24845 || rs6000_tune == PROCESSOR_PPCE5500
24846 || rs6000_tune == PROCESSOR_PPCE6500
24847 || rs6000_tune == PROCESSOR_POWER4
24848 || rs6000_tune == PROCESSOR_POWER5
24849 || rs6000_tune == PROCESSOR_POWER7
24850 || rs6000_tune == PROCESSOR_POWER8
24851 || rs6000_tune == PROCESSOR_POWER9
24852 || rs6000_tune == PROCESSOR_FUTURE
24853 || rs6000_tune == PROCESSOR_CELL)
24854 && recog_memoized (dep_insn)
24855 && (INSN_CODE (dep_insn) >= 0))
24857 switch (get_attr_type (dep_insn))
24860 case TYPE_FPCOMPARE:
24861 case TYPE_CR_LOGICAL:
24865 if (get_attr_dot (dep_insn) == DOT_YES)
24870 if (get_attr_dot (dep_insn) == DOT_YES
24871 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
24882 if ((rs6000_tune == PROCESSOR_POWER6)
24883 && recog_memoized (dep_insn)
24884 && (INSN_CODE (dep_insn) >= 0))
24887 if (GET_CODE (PATTERN (insn)) != SET)
24888 /* If this happens, we have to extend this to schedule
24889 optimally. Return default for now. */
24892 /* Adjust the cost for the case where the value written
24893 by a fixed point operation is used as the address
24894 gen value on a store. */
24895 switch (get_attr_type (dep_insn))
24900 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24901 return get_attr_sign_extend (dep_insn)
24902 == SIGN_EXTEND_YES ? 6 : 4;
24907 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24908 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
24918 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24926 if (get_attr_update (dep_insn) == UPDATE_YES
24927 && ! rs6000_store_data_bypass_p (dep_insn, insn))
24933 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24939 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24940 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
24950 if ((rs6000_tune == PROCESSOR_POWER6)
24951 && recog_memoized (dep_insn)
24952 && (INSN_CODE (dep_insn) >= 0))
24955 /* Adjust the cost for the case where the value written
24956 by a fixed point instruction is used within the address
24957 gen portion of a subsequent load(u)(x) */
24958 switch (get_attr_type (dep_insn))
24963 if (set_to_load_agen (dep_insn, insn))
24964 return get_attr_sign_extend (dep_insn)
24965 == SIGN_EXTEND_YES ? 6 : 4;
24970 if (set_to_load_agen (dep_insn, insn))
24971 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
24981 if (set_to_load_agen (dep_insn, insn))
24989 if (get_attr_update (dep_insn) == UPDATE_YES
24990 && set_to_load_agen (dep_insn, insn))
24996 if (set_to_load_agen (dep_insn, insn))
25002 if (set_to_load_agen (dep_insn, insn))
25003 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
25013 if ((rs6000_tune == PROCESSOR_POWER6)
25014 && get_attr_update (insn) == UPDATE_NO
25015 && recog_memoized (dep_insn)
25016 && (INSN_CODE (dep_insn) >= 0)
25017 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
25024 /* Fall out to return default cost. */
25028 case REG_DEP_OUTPUT:
25029 /* Output dependency; DEP_INSN writes a register that INSN writes some
25031 if ((rs6000_tune == PROCESSOR_POWER6)
25032 && recog_memoized (dep_insn)
25033 && (INSN_CODE (dep_insn) >= 0))
25035 attr_type = get_attr_type (insn);
25040 case TYPE_FPSIMPLE:
25041 if (get_attr_type (dep_insn) == TYPE_FP
25042 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
25046 if (get_attr_update (insn) == UPDATE_NO
25047 && get_attr_type (dep_insn) == TYPE_MFFGPR)
25054 /* Fall through, no cost for output dependency. */
25058 /* Anti dependency; DEP_INSN reads a register that INSN writes some
25063 gcc_unreachable ();
25069 /* Debug version of rs6000_adjust_cost. */
25072 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
25073 int cost, unsigned int dw)
25075 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
25083 default: dep = "unknown depencency"; break;
25084 case REG_DEP_TRUE: dep = "data dependency"; break;
25085 case REG_DEP_OUTPUT: dep = "output dependency"; break;
25086 case REG_DEP_ANTI: dep = "anti depencency"; break;
25090 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
25091 "%s, insn:\n", ret, cost, dep);
25099 /* The function returns a true if INSN is microcoded.
25100 Return false otherwise. */
25103 is_microcoded_insn (rtx_insn *insn)
25105 if (!insn || !NONDEBUG_INSN_P (insn)
25106 || GET_CODE (PATTERN (insn)) == USE
25107 || GET_CODE (PATTERN (insn)) == CLOBBER)
25110 if (rs6000_tune == PROCESSOR_CELL)
25111 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
25113 if (rs6000_sched_groups
25114 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
25116 enum attr_type type = get_attr_type (insn);
25117 if ((type == TYPE_LOAD
25118 && get_attr_update (insn) == UPDATE_YES
25119 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
25120 || ((type == TYPE_LOAD || type == TYPE_STORE)
25121 && get_attr_update (insn) == UPDATE_YES
25122 && get_attr_indexed (insn) == INDEXED_YES)
25123 || type == TYPE_MFCR)
25130 /* The function returns true if INSN is cracked into 2 instructions
25131 by the processor (and therefore occupies 2 issue slots). */
25134 is_cracked_insn (rtx_insn *insn)
25136 if (!insn || !NONDEBUG_INSN_P (insn)
25137 || GET_CODE (PATTERN (insn)) == USE
25138 || GET_CODE (PATTERN (insn)) == CLOBBER)
25141 if (rs6000_sched_groups
25142 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
25144 enum attr_type type = get_attr_type (insn);
25145 if ((type == TYPE_LOAD
25146 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
25147 && get_attr_update (insn) == UPDATE_NO)
25148 || (type == TYPE_LOAD
25149 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
25150 && get_attr_update (insn) == UPDATE_YES
25151 && get_attr_indexed (insn) == INDEXED_NO)
25152 || (type == TYPE_STORE
25153 && get_attr_update (insn) == UPDATE_YES
25154 && get_attr_indexed (insn) == INDEXED_NO)
25155 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
25156 && get_attr_update (insn) == UPDATE_YES)
25157 || (type == TYPE_CR_LOGICAL
25158 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
25159 || (type == TYPE_EXTS
25160 && get_attr_dot (insn) == DOT_YES)
25161 || (type == TYPE_SHIFT
25162 && get_attr_dot (insn) == DOT_YES
25163 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
25164 || (type == TYPE_MUL
25165 && get_attr_dot (insn) == DOT_YES)
25166 || type == TYPE_DIV
25167 || (type == TYPE_INSERT
25168 && get_attr_size (insn) == SIZE_32))
25175 /* The function returns true if INSN can be issued only from
25176 the branch slot. */
25179 is_branch_slot_insn (rtx_insn *insn)
25181 if (!insn || !NONDEBUG_INSN_P (insn)
25182 || GET_CODE (PATTERN (insn)) == USE
25183 || GET_CODE (PATTERN (insn)) == CLOBBER)
25186 if (rs6000_sched_groups)
25188 enum attr_type type = get_attr_type (insn);
25189 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
25197 /* The function returns true if out_inst sets a value that is
25198 used in the address generation computation of in_insn */
25200 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
25202 rtx out_set, in_set;
25204 /* For performance reasons, only handle the simple case where
25205 both loads are a single_set. */
25206 out_set = single_set (out_insn);
25209 in_set = single_set (in_insn);
25211 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
25217 /* Try to determine base/offset/size parts of the given MEM.
25218 Return true if successful, false if all the values couldn't
25221 This function only looks for REG or REG+CONST address forms.
25222 REG+REG address form will return false. */
25225 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
25226 HOST_WIDE_INT *size)
25229 if MEM_SIZE_KNOWN_P (mem)
25230 *size = MEM_SIZE (mem);
25234 addr_rtx = (XEXP (mem, 0));
25235 if (GET_CODE (addr_rtx) == PRE_MODIFY)
25236 addr_rtx = XEXP (addr_rtx, 1);
25239 while (GET_CODE (addr_rtx) == PLUS
25240 && CONST_INT_P (XEXP (addr_rtx, 1)))
25242 *offset += INTVAL (XEXP (addr_rtx, 1));
25243 addr_rtx = XEXP (addr_rtx, 0);
25245 if (!REG_P (addr_rtx))
25252 /* The function returns true if the target storage location of
25253 mem1 is adjacent to the target storage location of mem2 */
25254 /* Return 1 if memory locations are adjacent. */
25257 adjacent_mem_locations (rtx mem1, rtx mem2)
25260 HOST_WIDE_INT off1, size1, off2, size2;
25262 if (get_memref_parts (mem1, ®1, &off1, &size1)
25263 && get_memref_parts (mem2, ®2, &off2, &size2))
25264 return ((REGNO (reg1) == REGNO (reg2))
25265 && ((off1 + size1 == off2)
25266 || (off2 + size2 == off1)));
25271 /* This function returns true if it can be determined that the two MEM
25272 locations overlap by at least 1 byte based on base reg/offset/size. */
25275 mem_locations_overlap (rtx mem1, rtx mem2)
25278 HOST_WIDE_INT off1, size1, off2, size2;
25280 if (get_memref_parts (mem1, ®1, &off1, &size1)
25281 && get_memref_parts (mem2, ®2, &off2, &size2))
25282 return ((REGNO (reg1) == REGNO (reg2))
25283 && (((off1 <= off2) && (off1 + size1 > off2))
25284 || ((off2 <= off1) && (off2 + size2 > off1))));
25289 /* A C statement (sans semicolon) to update the integer scheduling
25290 priority INSN_PRIORITY (INSN). Increase the priority to execute the
25291 INSN earlier, reduce the priority to execute INSN later. Do not
25292 define this macro if you do not need to adjust the scheduling
25293 priorities of insns. */
25296 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
25298 rtx load_mem, str_mem;
25299 /* On machines (like the 750) which have asymmetric integer units,
25300 where one integer unit can do multiply and divides and the other
25301 can't, reduce the priority of multiply/divide so it is scheduled
25302 before other integer operations. */
25305 if (! INSN_P (insn))
25308 if (GET_CODE (PATTERN (insn)) == USE)
25311 switch (rs6000_tune) {
25312 case PROCESSOR_PPC750:
25313 switch (get_attr_type (insn))
25320 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
25321 priority, priority);
25322 if (priority >= 0 && priority < 0x01000000)
25329 if (insn_must_be_first_in_group (insn)
25330 && reload_completed
25331 && current_sched_info->sched_max_insns_priority
25332 && rs6000_sched_restricted_insns_priority)
25335 /* Prioritize insns that can be dispatched only in the first
25337 if (rs6000_sched_restricted_insns_priority == 1)
25338 /* Attach highest priority to insn. This means that in
25339 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
25340 precede 'priority' (critical path) considerations. */
25341 return current_sched_info->sched_max_insns_priority;
25342 else if (rs6000_sched_restricted_insns_priority == 2)
25343 /* Increase priority of insn by a minimal amount. This means that in
25344 haifa-sched.c:ready_sort(), only 'priority' (critical path)
25345 considerations precede dispatch-slot restriction considerations. */
25346 return (priority + 1);
25349 if (rs6000_tune == PROCESSOR_POWER6
25350 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
25351 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
25352 /* Attach highest priority to insn if the scheduler has just issued two
25353 stores and this instruction is a load, or two loads and this instruction
25354 is a store. Power6 wants loads and stores scheduled alternately
25356 return current_sched_info->sched_max_insns_priority;
25361 /* Return true if the instruction is nonpipelined on the Cell. */
25363 is_nonpipeline_insn (rtx_insn *insn)
25365 enum attr_type type;
25366 if (!insn || !NONDEBUG_INSN_P (insn)
25367 || GET_CODE (PATTERN (insn)) == USE
25368 || GET_CODE (PATTERN (insn)) == CLOBBER)
25371 type = get_attr_type (insn);
25372 if (type == TYPE_MUL
25373 || type == TYPE_DIV
25374 || type == TYPE_SDIV
25375 || type == TYPE_DDIV
25376 || type == TYPE_SSQRT
25377 || type == TYPE_DSQRT
25378 || type == TYPE_MFCR
25379 || type == TYPE_MFCRF
25380 || type == TYPE_MFJMPR)
25388 /* Return how many instructions the machine can issue per cycle. */
25391 rs6000_issue_rate (void)
25393 /* Unless scheduling for register pressure, use issue rate of 1 for
25394 first scheduling pass to decrease degradation. */
25395 if (!reload_completed && !flag_sched_pressure)
25398 switch (rs6000_tune) {
25399 case PROCESSOR_RS64A:
25400 case PROCESSOR_PPC601: /* ? */
25401 case PROCESSOR_PPC7450:
25403 case PROCESSOR_PPC440:
25404 case PROCESSOR_PPC603:
25405 case PROCESSOR_PPC750:
25406 case PROCESSOR_PPC7400:
25407 case PROCESSOR_PPC8540:
25408 case PROCESSOR_PPC8548:
25409 case PROCESSOR_CELL:
25410 case PROCESSOR_PPCE300C2:
25411 case PROCESSOR_PPCE300C3:
25412 case PROCESSOR_PPCE500MC:
25413 case PROCESSOR_PPCE500MC64:
25414 case PROCESSOR_PPCE5500:
25415 case PROCESSOR_PPCE6500:
25416 case PROCESSOR_TITAN:
25418 case PROCESSOR_PPC476:
25419 case PROCESSOR_PPC604:
25420 case PROCESSOR_PPC604e:
25421 case PROCESSOR_PPC620:
25422 case PROCESSOR_PPC630:
25424 case PROCESSOR_POWER4:
25425 case PROCESSOR_POWER5:
25426 case PROCESSOR_POWER6:
25427 case PROCESSOR_POWER7:
25429 case PROCESSOR_POWER8:
25431 case PROCESSOR_POWER9:
25432 case PROCESSOR_FUTURE:
25439 /* Return how many instructions to look ahead for better insn
25443 rs6000_use_sched_lookahead (void)
25445 switch (rs6000_tune)
25447 case PROCESSOR_PPC8540:
25448 case PROCESSOR_PPC8548:
25451 case PROCESSOR_CELL:
25452 return (reload_completed ? 8 : 0);
25459 /* We are choosing insn from the ready queue. Return zero if INSN can be
25462 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
25464 if (ready_index == 0)
25467 if (rs6000_tune != PROCESSOR_CELL)
25470 gcc_assert (insn != NULL_RTX && INSN_P (insn));
25472 if (!reload_completed
25473 || is_nonpipeline_insn (insn)
25474 || is_microcoded_insn (insn))
25480 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
25481 and return true. */
25484 find_mem_ref (rtx pat, rtx *mem_ref)
25489 /* stack_tie does not produce any real memory traffic. */
25490 if (tie_operand (pat, VOIDmode))
25499 /* Recursively process the pattern. */
25500 fmt = GET_RTX_FORMAT (GET_CODE (pat));
25502 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
25506 if (find_mem_ref (XEXP (pat, i), mem_ref))
25509 else if (fmt[i] == 'E')
25510 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
25512 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
25520 /* Determine if PAT is a PATTERN of a load insn. */
25523 is_load_insn1 (rtx pat, rtx *load_mem)
25525 if (!pat || pat == NULL_RTX)
25528 if (GET_CODE (pat) == SET)
25529 return find_mem_ref (SET_SRC (pat), load_mem);
25531 if (GET_CODE (pat) == PARALLEL)
25535 for (i = 0; i < XVECLEN (pat, 0); i++)
25536 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
25543 /* Determine if INSN loads from memory. */
25546 is_load_insn (rtx insn, rtx *load_mem)
25548 if (!insn || !INSN_P (insn))
25554 return is_load_insn1 (PATTERN (insn), load_mem);
25557 /* Determine if PAT is a PATTERN of a store insn. */
25560 is_store_insn1 (rtx pat, rtx *str_mem)
25562 if (!pat || pat == NULL_RTX)
25565 if (GET_CODE (pat) == SET)
25566 return find_mem_ref (SET_DEST (pat), str_mem);
25568 if (GET_CODE (pat) == PARALLEL)
25572 for (i = 0; i < XVECLEN (pat, 0); i++)
25573 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
25580 /* Determine if INSN stores to memory. */
25583 is_store_insn (rtx insn, rtx *str_mem)
25585 if (!insn || !INSN_P (insn))
25588 return is_store_insn1 (PATTERN (insn), str_mem);
25591 /* Return whether TYPE is a Power9 pairable vector instruction type. */
25594 is_power9_pairable_vec_type (enum attr_type type)
25598 case TYPE_VECSIMPLE:
25599 case TYPE_VECCOMPLEX:
25603 case TYPE_VECFLOAT:
25605 case TYPE_VECDOUBLE:
25613 /* Returns whether the dependence between INSN and NEXT is considered
25614 costly by the given target. */
25617 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
25621 rtx load_mem, str_mem;
25623 /* If the flag is not enabled - no dependence is considered costly;
25624 allow all dependent insns in the same group.
25625 This is the most aggressive option. */
25626 if (rs6000_sched_costly_dep == no_dep_costly)
25629 /* If the flag is set to 1 - a dependence is always considered costly;
25630 do not allow dependent instructions in the same group.
25631 This is the most conservative option. */
25632 if (rs6000_sched_costly_dep == all_deps_costly)
25635 insn = DEP_PRO (dep);
25636 next = DEP_CON (dep);
25638 if (rs6000_sched_costly_dep == store_to_load_dep_costly
25639 && is_load_insn (next, &load_mem)
25640 && is_store_insn (insn, &str_mem))
25641 /* Prevent load after store in the same group. */
25644 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
25645 && is_load_insn (next, &load_mem)
25646 && is_store_insn (insn, &str_mem)
25647 && DEP_TYPE (dep) == REG_DEP_TRUE
25648 && mem_locations_overlap(str_mem, load_mem))
25649 /* Prevent load after store in the same group if it is a true
25653 /* The flag is set to X; dependences with latency >= X are considered costly,
25654 and will not be scheduled in the same group. */
25655 if (rs6000_sched_costly_dep <= max_dep_latency
25656 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
25662 /* Return the next insn after INSN that is found before TAIL is reached,
25663 skipping any "non-active" insns - insns that will not actually occupy
25664 an issue slot. Return NULL_RTX if such an insn is not found. */
25667 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
25669 if (insn == NULL_RTX || insn == tail)
25674 insn = NEXT_INSN (insn);
25675 if (insn == NULL_RTX || insn == tail)
25679 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
25680 || (NONJUMP_INSN_P (insn)
25681 && GET_CODE (PATTERN (insn)) != USE
25682 && GET_CODE (PATTERN (insn)) != CLOBBER
25683 && INSN_CODE (insn) != CODE_FOR_stack_tie))
25689 /* Do Power9 specific sched_reorder2 reordering of ready list. */
25692 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
25697 enum attr_type type, type2;
25699 type = get_attr_type (last_scheduled_insn);
25701 /* Try to issue fixed point divides back-to-back in pairs so they will be
25702 routed to separate execution units and execute in parallel. */
25703 if (type == TYPE_DIV && divide_cnt == 0)
25705 /* First divide has been scheduled. */
25708 /* Scan the ready list looking for another divide, if found move it
25709 to the end of the list so it is chosen next. */
25713 if (recog_memoized (ready[pos]) >= 0
25714 && get_attr_type (ready[pos]) == TYPE_DIV)
25717 for (i = pos; i < lastpos; i++)
25718 ready[i] = ready[i + 1];
25719 ready[lastpos] = tmp;
25727 /* Last insn was the 2nd divide or not a divide, reset the counter. */
25730 /* The best dispatch throughput for vector and vector load insns can be
25731 achieved by interleaving a vector and vector load such that they'll
25732 dispatch to the same superslice. If this pairing cannot be achieved
25733 then it is best to pair vector insns together and vector load insns
25736 To aid in this pairing, vec_pairing maintains the current state with
25737 the following values:
25739 0 : Initial state, no vecload/vector pairing has been started.
25741 1 : A vecload or vector insn has been issued and a candidate for
25742 pairing has been found and moved to the end of the ready
25744 if (type == TYPE_VECLOAD)
25746 /* Issued a vecload. */
25747 if (vec_pairing == 0)
25749 int vecload_pos = -1;
25750 /* We issued a single vecload, look for a vector insn to pair it
25751 with. If one isn't found, try to pair another vecload. */
25755 if (recog_memoized (ready[pos]) >= 0)
25757 type2 = get_attr_type (ready[pos]);
25758 if (is_power9_pairable_vec_type (type2))
25760 /* Found a vector insn to pair with, move it to the
25761 end of the ready list so it is scheduled next. */
25763 for (i = pos; i < lastpos; i++)
25764 ready[i] = ready[i + 1];
25765 ready[lastpos] = tmp;
25767 return cached_can_issue_more;
25769 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
25770 /* Remember position of first vecload seen. */
25775 if (vecload_pos >= 0)
25777 /* Didn't find a vector to pair with but did find a vecload,
25778 move it to the end of the ready list. */
25779 tmp = ready[vecload_pos];
25780 for (i = vecload_pos; i < lastpos; i++)
25781 ready[i] = ready[i + 1];
25782 ready[lastpos] = tmp;
25784 return cached_can_issue_more;
25788 else if (is_power9_pairable_vec_type (type))
25790 /* Issued a vector operation. */
25791 if (vec_pairing == 0)
25794 /* We issued a single vector insn, look for a vecload to pair it
25795 with. If one isn't found, try to pair another vector. */
25799 if (recog_memoized (ready[pos]) >= 0)
25801 type2 = get_attr_type (ready[pos]);
25802 if (type2 == TYPE_VECLOAD)
25804 /* Found a vecload insn to pair with, move it to the
25805 end of the ready list so it is scheduled next. */
25807 for (i = pos; i < lastpos; i++)
25808 ready[i] = ready[i + 1];
25809 ready[lastpos] = tmp;
25811 return cached_can_issue_more;
25813 else if (is_power9_pairable_vec_type (type2)
25815 /* Remember position of first vector insn seen. */
25822 /* Didn't find a vecload to pair with but did find a vector
25823 insn, move it to the end of the ready list. */
25824 tmp = ready[vec_pos];
25825 for (i = vec_pos; i < lastpos; i++)
25826 ready[i] = ready[i + 1];
25827 ready[lastpos] = tmp;
25829 return cached_can_issue_more;
25834 /* We've either finished a vec/vecload pair, couldn't find an insn to
25835 continue the current pair, or the last insn had nothing to do with
25836 with pairing. In any case, reset the state. */
25840 return cached_can_issue_more;
25843 /* We are about to begin issuing insns for this clock cycle. */
25846 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
25847 rtx_insn **ready ATTRIBUTE_UNUSED,
25848 int *pn_ready ATTRIBUTE_UNUSED,
25849 int clock_var ATTRIBUTE_UNUSED)
25851 int n_ready = *pn_ready;
25854 fprintf (dump, "// rs6000_sched_reorder :\n");
25856 /* Reorder the ready list, if the second to last ready insn
25857 is a nonepipeline insn. */
25858 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
25860 if (is_nonpipeline_insn (ready[n_ready - 1])
25861 && (recog_memoized (ready[n_ready - 2]) > 0))
25862 /* Simply swap first two insns. */
25863 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
25866 if (rs6000_tune == PROCESSOR_POWER6)
25867 load_store_pendulum = 0;
25869 return rs6000_issue_rate ();
25872 /* Like rs6000_sched_reorder, but called after issuing each insn. */
25875 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
25876 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
25879 fprintf (dump, "// rs6000_sched_reorder2 :\n");
25881 /* For Power6, we need to handle some special cases to try and keep the
25882 store queue from overflowing and triggering expensive flushes.
25884 This code monitors how load and store instructions are being issued
25885 and skews the ready list one way or the other to increase the likelihood
25886 that a desired instruction is issued at the proper time.
25888 A couple of things are done. First, we maintain a "load_store_pendulum"
25889 to track the current state of load/store issue.
25891 - If the pendulum is at zero, then no loads or stores have been
25892 issued in the current cycle so we do nothing.
25894 - If the pendulum is 1, then a single load has been issued in this
25895 cycle and we attempt to locate another load in the ready list to
25898 - If the pendulum is -2, then two stores have already been
25899 issued in this cycle, so we increase the priority of the first load
25900 in the ready list to increase it's likelihood of being chosen first
25903 - If the pendulum is -1, then a single store has been issued in this
25904 cycle and we attempt to locate another store in the ready list to
25905 issue with it, preferring a store to an adjacent memory location to
25906 facilitate store pairing in the store queue.
25908 - If the pendulum is 2, then two loads have already been
25909 issued in this cycle, so we increase the priority of the first store
25910 in the ready list to increase it's likelihood of being chosen first
25913 - If the pendulum < -2 or > 2, then do nothing.
25915 Note: This code covers the most common scenarios. There exist non
25916 load/store instructions which make use of the LSU and which
25917 would need to be accounted for to strictly model the behavior
25918 of the machine. Those instructions are currently unaccounted
25919 for to help minimize compile time overhead of this code.
25921 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
25926 rtx load_mem, str_mem;
25928 if (is_store_insn (last_scheduled_insn, &str_mem))
25929 /* Issuing a store, swing the load_store_pendulum to the left */
25930 load_store_pendulum--;
25931 else if (is_load_insn (last_scheduled_insn, &load_mem))
25932 /* Issuing a load, swing the load_store_pendulum to the right */
25933 load_store_pendulum++;
25935 return cached_can_issue_more;
25937 /* If the pendulum is balanced, or there is only one instruction on
25938 the ready list, then all is well, so return. */
25939 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
25940 return cached_can_issue_more;
25942 if (load_store_pendulum == 1)
25944 /* A load has been issued in this cycle. Scan the ready list
25945 for another load to issue with it */
25950 if (is_load_insn (ready[pos], &load_mem))
25952 /* Found a load. Move it to the head of the ready list,
25953 and adjust it's priority so that it is more likely to
25956 for (i=pos; i<*pn_ready-1; i++)
25957 ready[i] = ready[i + 1];
25958 ready[*pn_ready-1] = tmp;
25960 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
25961 INSN_PRIORITY (tmp)++;
25967 else if (load_store_pendulum == -2)
25969 /* Two stores have been issued in this cycle. Increase the
25970 priority of the first load in the ready list to favor it for
25971 issuing in the next cycle. */
25976 if (is_load_insn (ready[pos], &load_mem)
25978 && INSN_PRIORITY_KNOWN (ready[pos]))
25980 INSN_PRIORITY (ready[pos])++;
25982 /* Adjust the pendulum to account for the fact that a load
25983 was found and increased in priority. This is to prevent
25984 increasing the priority of multiple loads */
25985 load_store_pendulum--;
25992 else if (load_store_pendulum == -1)
25994 /* A store has been issued in this cycle. Scan the ready list for
25995 another store to issue with it, preferring a store to an adjacent
25997 int first_store_pos = -1;
26003 if (is_store_insn (ready[pos], &str_mem))
26006 /* Maintain the index of the first store found on the
26008 if (first_store_pos == -1)
26009 first_store_pos = pos;
26011 if (is_store_insn (last_scheduled_insn, &str_mem2)
26012 && adjacent_mem_locations (str_mem, str_mem2))
26014 /* Found an adjacent store. Move it to the head of the
26015 ready list, and adjust it's priority so that it is
26016 more likely to stay there */
26018 for (i=pos; i<*pn_ready-1; i++)
26019 ready[i] = ready[i + 1];
26020 ready[*pn_ready-1] = tmp;
26022 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
26023 INSN_PRIORITY (tmp)++;
26025 first_store_pos = -1;
26033 if (first_store_pos >= 0)
26035 /* An adjacent store wasn't found, but a non-adjacent store was,
26036 so move the non-adjacent store to the front of the ready
26037 list, and adjust its priority so that it is more likely to
26039 tmp = ready[first_store_pos];
26040 for (i=first_store_pos; i<*pn_ready-1; i++)
26041 ready[i] = ready[i + 1];
26042 ready[*pn_ready-1] = tmp;
26043 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
26044 INSN_PRIORITY (tmp)++;
26047 else if (load_store_pendulum == 2)
26049 /* Two loads have been issued in this cycle. Increase the priority
26050 of the first store in the ready list to favor it for issuing in
26056 if (is_store_insn (ready[pos], &str_mem)
26058 && INSN_PRIORITY_KNOWN (ready[pos]))
26060 INSN_PRIORITY (ready[pos])++;
26062 /* Adjust the pendulum to account for the fact that a store
26063 was found and increased in priority. This is to prevent
26064 increasing the priority of multiple stores */
26065 load_store_pendulum++;
26074 /* Do Power9 dependent reordering if necessary. */
26075 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
26076 && recog_memoized (last_scheduled_insn) >= 0)
26077 return power9_sched_reorder2 (ready, *pn_ready - 1);
26079 return cached_can_issue_more;
26082 /* Return whether the presence of INSN causes a dispatch group termination
26083 of group WHICH_GROUP.
26085 If WHICH_GROUP == current_group, this function will return true if INSN
26086 causes the termination of the current group (i.e, the dispatch group to
26087 which INSN belongs). This means that INSN will be the last insn in the
26088 group it belongs to.
26090 If WHICH_GROUP == previous_group, this function will return true if INSN
26091 causes the termination of the previous group (i.e, the dispatch group that
26092 precedes the group to which INSN belongs). This means that INSN will be
26093 the first insn in the group it belongs to). */
26096 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
26103 first = insn_must_be_first_in_group (insn);
26104 last = insn_must_be_last_in_group (insn);
26109 if (which_group == current_group)
26111 else if (which_group == previous_group)
26119 insn_must_be_first_in_group (rtx_insn *insn)
26121 enum attr_type type;
26125 || DEBUG_INSN_P (insn)
26126 || GET_CODE (PATTERN (insn)) == USE
26127 || GET_CODE (PATTERN (insn)) == CLOBBER)
26130 switch (rs6000_tune)
26132 case PROCESSOR_POWER5:
26133 if (is_cracked_insn (insn))
26136 case PROCESSOR_POWER4:
26137 if (is_microcoded_insn (insn))
26140 if (!rs6000_sched_groups)
26143 type = get_attr_type (insn);
26150 case TYPE_CR_LOGICAL:
26163 case PROCESSOR_POWER6:
26164 type = get_attr_type (insn);
26173 case TYPE_FPCOMPARE:
26184 if (get_attr_dot (insn) == DOT_NO
26185 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
26190 if (get_attr_size (insn) == SIZE_32)
26198 if (get_attr_update (insn) == UPDATE_YES)
26206 case PROCESSOR_POWER7:
26207 type = get_attr_type (insn);
26211 case TYPE_CR_LOGICAL:
26225 if (get_attr_dot (insn) == DOT_YES)
26230 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26231 || get_attr_update (insn) == UPDATE_YES)
26238 if (get_attr_update (insn) == UPDATE_YES)
26246 case PROCESSOR_POWER8:
26247 type = get_attr_type (insn);
26251 case TYPE_CR_LOGICAL:
26259 case TYPE_VECSTORE:
26266 if (get_attr_dot (insn) == DOT_YES)
26271 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26272 || get_attr_update (insn) == UPDATE_YES)
26277 if (get_attr_update (insn) == UPDATE_YES
26278 && get_attr_indexed (insn) == INDEXED_YES)
26294 insn_must_be_last_in_group (rtx_insn *insn)
26296 enum attr_type type;
26300 || DEBUG_INSN_P (insn)
26301 || GET_CODE (PATTERN (insn)) == USE
26302 || GET_CODE (PATTERN (insn)) == CLOBBER)
26305 switch (rs6000_tune) {
26306 case PROCESSOR_POWER4:
26307 case PROCESSOR_POWER5:
26308 if (is_microcoded_insn (insn))
26311 if (is_branch_slot_insn (insn))
26315 case PROCESSOR_POWER6:
26316 type = get_attr_type (insn);
26324 case TYPE_FPCOMPARE:
26335 if (get_attr_dot (insn) == DOT_NO
26336 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
26341 if (get_attr_size (insn) == SIZE_32)
26349 case PROCESSOR_POWER7:
26350 type = get_attr_type (insn);
26360 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26361 && get_attr_update (insn) == UPDATE_YES)
26366 if (get_attr_update (insn) == UPDATE_YES
26367 && get_attr_indexed (insn) == INDEXED_YES)
26375 case PROCESSOR_POWER8:
26376 type = get_attr_type (insn);
26388 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26389 && get_attr_update (insn) == UPDATE_YES)
26394 if (get_attr_update (insn) == UPDATE_YES
26395 && get_attr_indexed (insn) == INDEXED_YES)
26410 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
26411 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
26414 is_costly_group (rtx *group_insns, rtx next_insn)
26417 int issue_rate = rs6000_issue_rate ();
26419 for (i = 0; i < issue_rate; i++)
26421 sd_iterator_def sd_it;
26423 rtx insn = group_insns[i];
26428 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
26430 rtx next = DEP_CON (dep);
26432 if (next == next_insn
26433 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
26441 /* Utility of the function redefine_groups.
26442 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
26443 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
26444 to keep it "far" (in a separate group) from GROUP_INSNS, following
26445 one of the following schemes, depending on the value of the flag
26446 -minsert_sched_nops = X:
26447 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
26448 in order to force NEXT_INSN into a separate group.
26449 (2) X < sched_finish_regroup_exact: insert exactly X nops.
26450 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
26451 insertion (has a group just ended, how many vacant issue slots remain in the
26452 last group, and how many dispatch groups were encountered so far). */
26455 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
26456 rtx_insn *next_insn, bool *group_end, int can_issue_more,
26461 int issue_rate = rs6000_issue_rate ();
26462 bool end = *group_end;
26465 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
26466 return can_issue_more;
26468 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
26469 return can_issue_more;
26471 force = is_costly_group (group_insns, next_insn);
26473 return can_issue_more;
26475 if (sched_verbose > 6)
26476 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
26477 *group_count ,can_issue_more);
26479 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
26482 can_issue_more = 0;
26484 /* Since only a branch can be issued in the last issue_slot, it is
26485 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
26486 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
26487 in this case the last nop will start a new group and the branch
26488 will be forced to the new group. */
26489 if (can_issue_more && !is_branch_slot_insn (next_insn))
26492 /* Do we have a special group ending nop? */
26493 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
26494 || rs6000_tune == PROCESSOR_POWER8)
26496 nop = gen_group_ending_nop ();
26497 emit_insn_before (nop, next_insn);
26498 can_issue_more = 0;
26501 while (can_issue_more > 0)
26504 emit_insn_before (nop, next_insn);
26512 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
26514 int n_nops = rs6000_sched_insert_nops;
26516 /* Nops can't be issued from the branch slot, so the effective
26517 issue_rate for nops is 'issue_rate - 1'. */
26518 if (can_issue_more == 0)
26519 can_issue_more = issue_rate;
26521 if (can_issue_more == 0)
26523 can_issue_more = issue_rate - 1;
26526 for (i = 0; i < issue_rate; i++)
26528 group_insns[i] = 0;
26535 emit_insn_before (nop, next_insn);
26536 if (can_issue_more == issue_rate - 1) /* new group begins */
26539 if (can_issue_more == 0)
26541 can_issue_more = issue_rate - 1;
26544 for (i = 0; i < issue_rate; i++)
26546 group_insns[i] = 0;
26552 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
26555 /* Is next_insn going to start a new group? */
26558 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
26559 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
26560 || (can_issue_more < issue_rate &&
26561 insn_terminates_group_p (next_insn, previous_group)));
26562 if (*group_end && end)
26565 if (sched_verbose > 6)
26566 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
26567 *group_count, can_issue_more);
26568 return can_issue_more;
26571 return can_issue_more;
26574 /* This function tries to synch the dispatch groups that the compiler "sees"
26575 with the dispatch groups that the processor dispatcher is expected to
26576 form in practice. It tries to achieve this synchronization by forcing the
26577 estimated processor grouping on the compiler (as opposed to the function
26578 'pad_goups' which tries to force the scheduler's grouping on the processor).
26580 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
26581 examines the (estimated) dispatch groups that will be formed by the processor
26582 dispatcher. It marks these group boundaries to reflect the estimated
26583 processor grouping, overriding the grouping that the scheduler had marked.
26584 Depending on the value of the flag '-minsert-sched-nops' this function can
26585 force certain insns into separate groups or force a certain distance between
26586 them by inserting nops, for example, if there exists a "costly dependence"
26589 The function estimates the group boundaries that the processor will form as
26590 follows: It keeps track of how many vacant issue slots are available after
26591 each insn. A subsequent insn will start a new group if one of the following
26593 - no more vacant issue slots remain in the current dispatch group.
26594 - only the last issue slot, which is the branch slot, is vacant, but the next
26595 insn is not a branch.
26596 - only the last 2 or less issue slots, including the branch slot, are vacant,
26597 which means that a cracked insn (which occupies two issue slots) can't be
26598 issued in this group.
26599 - less than 'issue_rate' slots are vacant, and the next insn always needs to
26600 start a new group. */
26603 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
26606 rtx_insn *insn, *next_insn;
26608 int can_issue_more;
26611 int group_count = 0;
26615 issue_rate = rs6000_issue_rate ();
26616 group_insns = XALLOCAVEC (rtx, issue_rate);
26617 for (i = 0; i < issue_rate; i++)
26619 group_insns[i] = 0;
26621 can_issue_more = issue_rate;
26623 insn = get_next_active_insn (prev_head_insn, tail);
26626 while (insn != NULL_RTX)
26628 slot = (issue_rate - can_issue_more);
26629 group_insns[slot] = insn;
26631 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
26632 if (insn_terminates_group_p (insn, current_group))
26633 can_issue_more = 0;
26635 next_insn = get_next_active_insn (insn, tail);
26636 if (next_insn == NULL_RTX)
26637 return group_count + 1;
26639 /* Is next_insn going to start a new group? */
26641 = (can_issue_more == 0
26642 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
26643 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
26644 || (can_issue_more < issue_rate &&
26645 insn_terminates_group_p (next_insn, previous_group)));
26647 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
26648 next_insn, &group_end, can_issue_more,
26654 can_issue_more = 0;
26655 for (i = 0; i < issue_rate; i++)
26657 group_insns[i] = 0;
26661 if (GET_MODE (next_insn) == TImode && can_issue_more)
26662 PUT_MODE (next_insn, VOIDmode);
26663 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
26664 PUT_MODE (next_insn, TImode);
26667 if (can_issue_more == 0)
26668 can_issue_more = issue_rate;
26671 return group_count;
26674 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
26675 dispatch group boundaries that the scheduler had marked. Pad with nops
26676 any dispatch groups which have vacant issue slots, in order to force the
26677 scheduler's grouping on the processor dispatcher. The function
26678 returns the number of dispatch groups found. */
26681 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
26684 rtx_insn *insn, *next_insn;
26687 int can_issue_more;
26689 int group_count = 0;
26691 /* Initialize issue_rate. */
26692 issue_rate = rs6000_issue_rate ();
26693 can_issue_more = issue_rate;
26695 insn = get_next_active_insn (prev_head_insn, tail);
26696 next_insn = get_next_active_insn (insn, tail);
26698 while (insn != NULL_RTX)
26701 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
26703 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
26705 if (next_insn == NULL_RTX)
26710 /* If the scheduler had marked group termination at this location
26711 (between insn and next_insn), and neither insn nor next_insn will
26712 force group termination, pad the group with nops to force group
26715 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
26716 && !insn_terminates_group_p (insn, current_group)
26717 && !insn_terminates_group_p (next_insn, previous_group))
26719 if (!is_branch_slot_insn (next_insn))
26722 while (can_issue_more)
26725 emit_insn_before (nop, next_insn);
26730 can_issue_more = issue_rate;
26735 next_insn = get_next_active_insn (insn, tail);
26738 return group_count;
26741 /* We're beginning a new block. Initialize data structures as necessary. */
26744 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
26745 int sched_verbose ATTRIBUTE_UNUSED,
26746 int max_ready ATTRIBUTE_UNUSED)
26748 last_scheduled_insn = NULL;
26749 load_store_pendulum = 0;
26754 /* The following function is called at the end of scheduling BB.
26755 After reload, it inserts nops at insn group bundling. */
26758 rs6000_sched_finish (FILE *dump, int sched_verbose)
26763 fprintf (dump, "=== Finishing schedule.\n");
26765 if (reload_completed && rs6000_sched_groups)
26767 /* Do not run sched_finish hook when selective scheduling enabled. */
26768 if (sel_sched_p ())
26771 if (rs6000_sched_insert_nops == sched_finish_none)
26774 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
26775 n_groups = pad_groups (dump, sched_verbose,
26776 current_sched_info->prev_head,
26777 current_sched_info->next_tail);
26779 n_groups = redefine_groups (dump, sched_verbose,
26780 current_sched_info->prev_head,
26781 current_sched_info->next_tail);
26783 if (sched_verbose >= 6)
26785 fprintf (dump, "ngroups = %d\n", n_groups);
26786 print_rtl (dump, current_sched_info->prev_head);
26787 fprintf (dump, "Done finish_sched\n");
26792 struct rs6000_sched_context
26794 short cached_can_issue_more;
26795 rtx_insn *last_scheduled_insn;
26796 int load_store_pendulum;
26801 typedef struct rs6000_sched_context rs6000_sched_context_def;
26802 typedef rs6000_sched_context_def *rs6000_sched_context_t;
26804 /* Allocate store for new scheduling context. */
26806 rs6000_alloc_sched_context (void)
26808 return xmalloc (sizeof (rs6000_sched_context_def));
26811 /* If CLEAN_P is true then initializes _SC with clean data,
26812 and from the global context otherwise. */
26814 rs6000_init_sched_context (void *_sc, bool clean_p)
26816 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
26820 sc->cached_can_issue_more = 0;
26821 sc->last_scheduled_insn = NULL;
26822 sc->load_store_pendulum = 0;
26823 sc->divide_cnt = 0;
26824 sc->vec_pairing = 0;
26828 sc->cached_can_issue_more = cached_can_issue_more;
26829 sc->last_scheduled_insn = last_scheduled_insn;
26830 sc->load_store_pendulum = load_store_pendulum;
26831 sc->divide_cnt = divide_cnt;
26832 sc->vec_pairing = vec_pairing;
26836 /* Sets the global scheduling context to the one pointed to by _SC. */
26838 rs6000_set_sched_context (void *_sc)
26840 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
26842 gcc_assert (sc != NULL);
26844 cached_can_issue_more = sc->cached_can_issue_more;
26845 last_scheduled_insn = sc->last_scheduled_insn;
26846 load_store_pendulum = sc->load_store_pendulum;
26847 divide_cnt = sc->divide_cnt;
26848 vec_pairing = sc->vec_pairing;
26853 rs6000_free_sched_context (void *_sc)
26855 gcc_assert (_sc != NULL);
26861 rs6000_sched_can_speculate_insn (rtx_insn *insn)
26863 switch (get_attr_type (insn))
26878 /* Length in units of the trampoline for entering a nested function. */
26881 rs6000_trampoline_size (void)
26885 switch (DEFAULT_ABI)
26888 gcc_unreachable ();
26891 ret = (TARGET_32BIT) ? 12 : 24;
26895 gcc_assert (!TARGET_32BIT);
26901 ret = (TARGET_32BIT) ? 40 : 48;
26908 /* Emit RTL insns to initialize the variable parts of a trampoline.
26909 FNADDR is an RTX for the address of the function's pure code.
26910 CXT is an RTX for the static chain value for the function. */
26913 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
26915 int regsize = (TARGET_32BIT) ? 4 : 8;
26916 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
26917 rtx ctx_reg = force_reg (Pmode, cxt);
26918 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
26920 switch (DEFAULT_ABI)
26923 gcc_unreachable ();
26925 /* Under AIX, just build the 3 word function descriptor */
26928 rtx fnmem, fn_reg, toc_reg;
26930 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
26931 error ("you cannot take the address of a nested function if you use "
26932 "the %qs option", "-mno-pointers-to-nested-functions");
26934 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
26935 fn_reg = gen_reg_rtx (Pmode);
26936 toc_reg = gen_reg_rtx (Pmode);
26938 /* Macro to shorten the code expansions below. */
26939 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
26941 m_tramp = replace_equiv_address (m_tramp, addr);
26943 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
26944 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
26945 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
26946 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
26947 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
26953 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
26957 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
26958 LCT_NORMAL, VOIDmode,
26960 GEN_INT (rs6000_trampoline_size ()), SImode,
26968 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
26969 identifier as an argument, so the front end shouldn't look it up. */
26972 rs6000_attribute_takes_identifier_p (const_tree attr_id)
26974 return is_attribute_p ("altivec", attr_id);
26977 /* Handle the "altivec" attribute. The attribute may have
26978 arguments as follows:
26980 __attribute__((altivec(vector__)))
26981 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
26982 __attribute__((altivec(bool__))) (always followed by 'unsigned')
26984 and may appear more than once (e.g., 'vector bool char') in a
26985 given declaration. */
26988 rs6000_handle_altivec_attribute (tree *node,
26989 tree name ATTRIBUTE_UNUSED,
26991 int flags ATTRIBUTE_UNUSED,
26992 bool *no_add_attrs)
26994 tree type = *node, result = NULL_TREE;
26998 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
26999 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
27000 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
27003 while (POINTER_TYPE_P (type)
27004 || TREE_CODE (type) == FUNCTION_TYPE
27005 || TREE_CODE (type) == METHOD_TYPE
27006 || TREE_CODE (type) == ARRAY_TYPE)
27007 type = TREE_TYPE (type);
27009 mode = TYPE_MODE (type);
27011 /* Check for invalid AltiVec type qualifiers. */
27012 if (type == long_double_type_node)
27013 error ("use of %<long double%> in AltiVec types is invalid");
27014 else if (type == boolean_type_node)
27015 error ("use of boolean types in AltiVec types is invalid");
27016 else if (TREE_CODE (type) == COMPLEX_TYPE)
27017 error ("use of %<complex%> in AltiVec types is invalid");
27018 else if (DECIMAL_FLOAT_MODE_P (mode))
27019 error ("use of decimal floating point types in AltiVec types is invalid");
27020 else if (!TARGET_VSX)
27022 if (type == long_unsigned_type_node || type == long_integer_type_node)
27025 error ("use of %<long%> in AltiVec types is invalid for "
27026 "64-bit code without %qs", "-mvsx");
27027 else if (rs6000_warn_altivec_long)
27028 warning (0, "use of %<long%> in AltiVec types is deprecated; "
27031 else if (type == long_long_unsigned_type_node
27032 || type == long_long_integer_type_node)
27033 error ("use of %<long long%> in AltiVec types is invalid without %qs",
27035 else if (type == double_type_node)
27036 error ("use of %<double%> in AltiVec types is invalid without %qs",
27040 switch (altivec_type)
27043 unsigned_p = TYPE_UNSIGNED (type);
27047 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
27050 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
27053 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
27056 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
27059 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
27061 case E_SFmode: result = V4SF_type_node; break;
27062 case E_DFmode: result = V2DF_type_node; break;
27063 /* If the user says 'vector int bool', we may be handed the 'bool'
27064 attribute _before_ the 'vector' attribute, and so select the
27065 proper type in the 'b' case below. */
27066 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
27067 case E_V2DImode: case E_V2DFmode:
27075 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
27076 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
27077 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
27078 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
27085 case E_V8HImode: result = pixel_V8HI_type_node;
27091 /* Propagate qualifiers attached to the element type
27092 onto the vector type. */
27093 if (result && result != type && TYPE_QUALS (type))
27094 result = build_qualified_type (result, TYPE_QUALS (type));
27096 *no_add_attrs = true; /* No need to hang on to the attribute. */
27099 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
27104 /* AltiVec defines five built-in scalar types that serve as vector
27105 elements; we must teach the compiler how to mangle them. The 128-bit
27106 floating point mangling is target-specific as well. */
27108 static const char *
27109 rs6000_mangle_type (const_tree type)
27111 type = TYPE_MAIN_VARIANT (type);
27113 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
27114 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
27117 if (type == bool_char_type_node) return "U6__boolc";
27118 if (type == bool_short_type_node) return "U6__bools";
27119 if (type == pixel_type_node) return "u7__pixel";
27120 if (type == bool_int_type_node) return "U6__booli";
27121 if (type == bool_long_long_type_node) return "U6__boolx";
27123 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
27125 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
27126 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
27128 /* For all other types, use the default mangling. */
27132 /* Handle a "longcall" or "shortcall" attribute; arguments as in
27133 struct attribute_spec.handler. */
27136 rs6000_handle_longcall_attribute (tree *node, tree name,
27137 tree args ATTRIBUTE_UNUSED,
27138 int flags ATTRIBUTE_UNUSED,
27139 bool *no_add_attrs)
27141 if (TREE_CODE (*node) != FUNCTION_TYPE
27142 && TREE_CODE (*node) != FIELD_DECL
27143 && TREE_CODE (*node) != TYPE_DECL)
27145 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27147 *no_add_attrs = true;
27153 /* Set longcall attributes on all functions declared when
27154 rs6000_default_long_calls is true. */
27156 rs6000_set_default_type_attributes (tree type)
27158 if (rs6000_default_long_calls
27159 && (TREE_CODE (type) == FUNCTION_TYPE
27160 || TREE_CODE (type) == METHOD_TYPE))
27161 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
27163 TYPE_ATTRIBUTES (type));
27166 darwin_set_default_type_attributes (type);
27170 /* Return a reference suitable for calling a function with the
27171 longcall attribute. */
27174 rs6000_longcall_ref (rtx call_ref, rtx arg)
27176 /* System V adds '.' to the internal name, so skip them. */
27177 const char *call_name = XSTR (call_ref, 0);
27178 if (*call_name == '.')
27180 while (*call_name == '.')
27183 tree node = get_identifier (call_name);
27184 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
27189 rtx base = const0_rtx;
27191 if (rs6000_pcrel_p (cfun))
27193 rtx reg = gen_rtx_REG (Pmode, regno);
27194 rtx u = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
27196 emit_insn (gen_rtx_SET (reg, u));
27200 if (DEFAULT_ABI == ABI_ELFv2)
27201 base = gen_rtx_REG (Pmode, TOC_REGISTER);
27205 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
27208 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
27209 may be used by a function global entry point. For SysV4, r11
27210 is used by __glink_PLTresolve lazy resolver entry. */
27211 rtx reg = gen_rtx_REG (Pmode, regno);
27212 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
27214 rtx lo = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, reg, call_ref, arg),
27216 emit_insn (gen_rtx_SET (reg, hi));
27217 emit_insn (gen_rtx_SET (reg, lo));
27221 return force_reg (Pmode, call_ref);
27224 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
27225 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
27228 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27229 struct attribute_spec.handler. */
27231 rs6000_handle_struct_attribute (tree *node, tree name,
27232 tree args ATTRIBUTE_UNUSED,
27233 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27236 if (DECL_P (*node))
27238 if (TREE_CODE (*node) == TYPE_DECL)
27239 type = &TREE_TYPE (*node);
27244 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27245 || TREE_CODE (*type) == UNION_TYPE)))
27247 warning (OPT_Wattributes, "%qE attribute ignored", name);
27248 *no_add_attrs = true;
27251 else if ((is_attribute_p ("ms_struct", name)
27252 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27253 || ((is_attribute_p ("gcc_struct", name)
27254 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27256 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27258 *no_add_attrs = true;
27265 rs6000_ms_bitfield_layout_p (const_tree record_type)
27267 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
27268 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27269 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27272 #ifdef USING_ELFOS_H
27274 /* A get_unnamed_section callback, used for switching to toc_section. */
27277 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
27279 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27280 && TARGET_MINIMAL_TOC)
27282 if (!toc_initialized)
27284 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
27285 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27286 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
27287 fprintf (asm_out_file, "\t.tc ");
27288 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
27289 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
27290 fprintf (asm_out_file, "\n");
27292 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27293 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27294 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
27295 fprintf (asm_out_file, " = .+32768\n");
27296 toc_initialized = 1;
27299 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27301 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27303 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
27304 if (!toc_initialized)
27306 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27307 toc_initialized = 1;
27312 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27313 if (!toc_initialized)
27315 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27316 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
27317 fprintf (asm_out_file, " = .+32768\n");
27318 toc_initialized = 1;
27323 /* Implement TARGET_ASM_INIT_SECTIONS. */
27326 rs6000_elf_asm_init_sections (void)
27329 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
27332 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
27333 SDATA2_SECTION_ASM_OP);
27336 /* Implement TARGET_SELECT_RTX_SECTION. */
27339 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
27340 unsigned HOST_WIDE_INT align)
27342 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
27343 return toc_section;
27345 return default_elf_select_rtx_section (mode, x, align);
27348 /* For a SYMBOL_REF, set generic flags and then perform some
27349 target-specific processing.
27351 When the AIX ABI is requested on a non-AIX system, replace the
27352 function name with the real name (with a leading .) rather than the
27353 function descriptor name. This saves a lot of overriding code to
27354 read the prefixes. */
27356 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
27358 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
27360 default_encode_section_info (decl, rtl, first);
27363 && TREE_CODE (decl) == FUNCTION_DECL
27365 && DEFAULT_ABI == ABI_AIX)
27367 rtx sym_ref = XEXP (rtl, 0);
27368 size_t len = strlen (XSTR (sym_ref, 0));
27369 char *str = XALLOCAVEC (char, len + 2);
27371 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
27372 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
27377 compare_section_name (const char *section, const char *templ)
27381 len = strlen (templ);
27382 return (strncmp (section, templ, len) == 0
27383 && (section[len] == 0 || section[len] == '.'));
27387 rs6000_elf_in_small_data_p (const_tree decl)
27389 if (rs6000_sdata == SDATA_NONE)
27392 /* We want to merge strings, so we never consider them small data. */
27393 if (TREE_CODE (decl) == STRING_CST)
27396 /* Functions are never in the small data area. */
27397 if (TREE_CODE (decl) == FUNCTION_DECL)
27400 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
27402 const char *section = DECL_SECTION_NAME (decl);
27403 if (compare_section_name (section, ".sdata")
27404 || compare_section_name (section, ".sdata2")
27405 || compare_section_name (section, ".gnu.linkonce.s")
27406 || compare_section_name (section, ".sbss")
27407 || compare_section_name (section, ".sbss2")
27408 || compare_section_name (section, ".gnu.linkonce.sb")
27409 || strcmp (section, ".PPC.EMB.sdata0") == 0
27410 || strcmp (section, ".PPC.EMB.sbss0") == 0)
27415 /* If we are told not to put readonly data in sdata, then don't. */
27416 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
27417 && !rs6000_readonly_in_sdata)
27420 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
27423 && size <= g_switch_value
27424 /* If it's not public, and we're not going to reference it there,
27425 there's no need to put it in the small data section. */
27426 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
27433 #endif /* USING_ELFOS_H */
27435 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
27438 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
27440 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
27443 /* Do not place thread-local symbols refs in the object blocks. */
27446 rs6000_use_blocks_for_decl_p (const_tree decl)
27448 return !DECL_THREAD_LOCAL_P (decl);
27451 /* Return a REG that occurs in ADDR with coefficient 1.
27452 ADDR can be effectively incremented by incrementing REG.
27454 r0 is special and we must not select it as an address
27455 register by this routine since our caller will try to
27456 increment the returned register via an "la" instruction. */
27459 find_addr_reg (rtx addr)
27461 while (GET_CODE (addr) == PLUS)
27463 if (REG_P (XEXP (addr, 0))
27464 && REGNO (XEXP (addr, 0)) != 0)
27465 addr = XEXP (addr, 0);
27466 else if (REG_P (XEXP (addr, 1))
27467 && REGNO (XEXP (addr, 1)) != 0)
27468 addr = XEXP (addr, 1);
27469 else if (CONSTANT_P (XEXP (addr, 0)))
27470 addr = XEXP (addr, 1);
27471 else if (CONSTANT_P (XEXP (addr, 1)))
27472 addr = XEXP (addr, 0);
27474 gcc_unreachable ();
27476 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
27481 rs6000_fatal_bad_address (rtx op)
27483 fatal_insn ("bad address", op);
27488 vec<branch_island, va_gc> *branch_islands;
27490 /* Remember to generate a branch island for far calls to the given
27494 add_compiler_branch_island (tree label_name, tree function_name,
27497 branch_island bi = {function_name, label_name, line_number};
27498 vec_safe_push (branch_islands, bi);
27501 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
27502 already there or not. */
27505 no_previous_def (tree function_name)
27510 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
27511 if (function_name == bi->function_name)
27516 /* GET_PREV_LABEL gets the label name from the previous definition of
27520 get_prev_label (tree function_name)
27525 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
27526 if (function_name == bi->function_name)
27527 return bi->label_name;
27531 /* Generate PIC and indirect symbol stubs. */
27534 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27536 unsigned int length;
27537 char *symbol_name, *lazy_ptr_name;
27538 char *local_label_0;
27539 static unsigned label = 0;
27541 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27542 symb = (*targetm.strip_name_encoding) (symb);
27545 length = strlen (symb);
27546 symbol_name = XALLOCAVEC (char, length + 32);
27547 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27549 lazy_ptr_name = XALLOCAVEC (char, length + 32);
27550 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
27553 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
27555 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
27559 fprintf (file, "\t.align 5\n");
27561 fprintf (file, "%s:\n", stub);
27562 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27565 local_label_0 = XALLOCAVEC (char, 16);
27566 sprintf (local_label_0, "L%u$spb", label);
27568 fprintf (file, "\tmflr r0\n");
27569 if (TARGET_LINK_STACK)
27572 get_ppc476_thunk_name (name);
27573 fprintf (file, "\tbl %s\n", name);
27574 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
27578 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
27579 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
27581 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
27582 lazy_ptr_name, local_label_0);
27583 fprintf (file, "\tmtlr r0\n");
27584 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
27585 (TARGET_64BIT ? "ldu" : "lwzu"),
27586 lazy_ptr_name, local_label_0);
27587 fprintf (file, "\tmtctr r12\n");
27588 fprintf (file, "\tbctr\n");
27592 fprintf (file, "\t.align 4\n");
27594 fprintf (file, "%s:\n", stub);
27595 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27597 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
27598 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
27599 (TARGET_64BIT ? "ldu" : "lwzu"),
27601 fprintf (file, "\tmtctr r12\n");
27602 fprintf (file, "\tbctr\n");
27605 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27606 fprintf (file, "%s:\n", lazy_ptr_name);
27607 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27608 fprintf (file, "%sdyld_stub_binding_helper\n",
27609 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
27612 /* Legitimize PIC addresses. If the address is already
27613 position-independent, we return ORIG. Newly generated
27614 position-independent addresses go into a reg. This is REG if non
27615 zero, otherwise we allocate register(s) as necessary. */
27617 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
27620 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
27625 if (reg == NULL && !reload_completed)
27626 reg = gen_reg_rtx (Pmode);
27628 if (GET_CODE (orig) == CONST)
27632 if (GET_CODE (XEXP (orig, 0)) == PLUS
27633 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
27636 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
27638 /* Use a different reg for the intermediate value, as
27639 it will be marked UNCHANGING. */
27640 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
27641 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
27644 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
27647 if (CONST_INT_P (offset))
27649 if (SMALL_INT (offset))
27650 return plus_constant (Pmode, base, INTVAL (offset));
27651 else if (!reload_completed)
27652 offset = force_reg (Pmode, offset);
27655 rtx mem = force_const_mem (Pmode, orig);
27656 return machopic_legitimize_pic_address (mem, Pmode, reg);
27659 return gen_rtx_PLUS (Pmode, base, offset);
27662 /* Fall back on generic machopic code. */
27663 return machopic_legitimize_pic_address (orig, mode, reg);
27666 /* Output a .machine directive for the Darwin assembler, and call
27667 the generic start_file routine. */
27670 rs6000_darwin_file_start (void)
27672 static const struct
27676 HOST_WIDE_INT if_set;
27678 { "ppc64", "ppc64", MASK_64BIT },
27679 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
27680 { "power4", "ppc970", 0 },
27681 { "G5", "ppc970", 0 },
27682 { "7450", "ppc7450", 0 },
27683 { "7400", "ppc7400", MASK_ALTIVEC },
27684 { "G4", "ppc7400", 0 },
27685 { "750", "ppc750", 0 },
27686 { "740", "ppc750", 0 },
27687 { "G3", "ppc750", 0 },
27688 { "604e", "ppc604e", 0 },
27689 { "604", "ppc604", 0 },
27690 { "603e", "ppc603", 0 },
27691 { "603", "ppc603", 0 },
27692 { "601", "ppc601", 0 },
27693 { NULL, "ppc", 0 } };
27694 const char *cpu_id = "";
27697 rs6000_file_start ();
27698 darwin_file_start ();
27700 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
27702 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
27703 cpu_id = rs6000_default_cpu;
27705 if (global_options_set.x_rs6000_cpu_index)
27706 cpu_id = processor_target_table[rs6000_cpu_index].name;
27708 /* Look through the mapping array. Pick the first name that either
27709 matches the argument, has a bit set in IF_SET that is also set
27710 in the target flags, or has a NULL name. */
27713 while (mapping[i].arg != NULL
27714 && strcmp (mapping[i].arg, cpu_id) != 0
27715 && (mapping[i].if_set & rs6000_isa_flags) == 0)
27718 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
27721 #endif /* TARGET_MACHO */
27725 rs6000_elf_reloc_rw_mask (void)
27729 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27735 /* Record an element in the table of global constructors. SYMBOL is
27736 a SYMBOL_REF of the function to be called; PRIORITY is a number
27737 between 0 and MAX_INIT_PRIORITY.
27739 This differs from default_named_section_asm_out_constructor in
27740 that we have special handling for -mrelocatable. */
27742 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
27744 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
27746 const char *section = ".ctors";
27749 if (priority != DEFAULT_INIT_PRIORITY)
27751 sprintf (buf, ".ctors.%.5u",
27752 /* Invert the numbering so the linker puts us in the proper
27753 order; constructors are run from right to left, and the
27754 linker sorts in increasing order. */
27755 MAX_INIT_PRIORITY - priority);
27759 switch_to_section (get_section (section, SECTION_WRITE, NULL));
27760 assemble_align (POINTER_SIZE);
27762 if (DEFAULT_ABI == ABI_V4
27763 && (TARGET_RELOCATABLE || flag_pic > 1))
27765 fputs ("\t.long (", asm_out_file);
27766 output_addr_const (asm_out_file, symbol);
27767 fputs (")@fixup\n", asm_out_file);
27770 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
27773 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
27775 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
27777 const char *section = ".dtors";
27780 if (priority != DEFAULT_INIT_PRIORITY)
27782 sprintf (buf, ".dtors.%.5u",
27783 /* Invert the numbering so the linker puts us in the proper
27784 order; constructors are run from right to left, and the
27785 linker sorts in increasing order. */
27786 MAX_INIT_PRIORITY - priority);
27790 switch_to_section (get_section (section, SECTION_WRITE, NULL));
27791 assemble_align (POINTER_SIZE);
27793 if (DEFAULT_ABI == ABI_V4
27794 && (TARGET_RELOCATABLE || flag_pic > 1))
27796 fputs ("\t.long (", asm_out_file);
27797 output_addr_const (asm_out_file, symbol);
27798 fputs (")@fixup\n", asm_out_file);
27801 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
27805 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
27807 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
27809 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
27810 ASM_OUTPUT_LABEL (file, name);
27811 fputs (DOUBLE_INT_ASM_OP, file);
27812 rs6000_output_function_entry (file, name);
27813 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
27816 fputs ("\t.size\t", file);
27817 assemble_name (file, name);
27818 fputs (",24\n\t.type\t.", file);
27819 assemble_name (file, name);
27820 fputs (",@function\n", file);
27821 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
27823 fputs ("\t.globl\t.", file);
27824 assemble_name (file, name);
27829 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
27830 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
27831 rs6000_output_function_entry (file, name);
27832 fputs (":\n", file);
27837 if (DEFAULT_ABI == ABI_V4
27838 && (TARGET_RELOCATABLE || flag_pic > 1)
27839 && !TARGET_SECURE_PLT
27840 && (!constant_pool_empty_p () || crtl->profile)
27841 && (uses_toc = uses_TOC ()))
27846 switch_to_other_text_partition ();
27847 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
27849 fprintf (file, "\t.long ");
27850 assemble_name (file, toc_label_name);
27853 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27854 assemble_name (file, buf);
27857 switch_to_other_text_partition ();
27860 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
27861 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
27863 if (TARGET_CMODEL == CMODEL_LARGE
27864 && rs6000_global_entry_point_prologue_needed_p ())
27868 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
27870 fprintf (file, "\t.quad .TOC.-");
27871 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27872 assemble_name (file, buf);
27876 if (DEFAULT_ABI == ABI_AIX)
27878 const char *desc_name, *orig_name;
27880 orig_name = (*targetm.strip_name_encoding) (name);
27881 desc_name = orig_name;
27882 while (*desc_name == '.')
27885 if (TREE_PUBLIC (decl))
27886 fprintf (file, "\t.globl %s\n", desc_name);
27888 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27889 fprintf (file, "%s:\n", desc_name);
27890 fprintf (file, "\t.long %s\n", orig_name);
27891 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
27892 fputs ("\t.long 0\n", file);
27893 fprintf (file, "\t.previous\n");
27895 ASM_OUTPUT_LABEL (file, name);
27898 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
27900 rs6000_elf_file_end (void)
27902 #ifdef HAVE_AS_GNU_ATTRIBUTE
27903 /* ??? The value emitted depends on options active at file end.
27904 Assume anyone using #pragma or attributes that might change
27905 options knows what they are doing. */
27906 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
27907 && rs6000_passes_float)
27911 if (TARGET_HARD_FLOAT)
27915 if (rs6000_passes_long_double)
27917 if (!TARGET_LONG_DOUBLE_128)
27919 else if (TARGET_IEEEQUAD)
27924 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
27926 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
27928 if (rs6000_passes_vector)
27929 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
27930 (TARGET_ALTIVEC_ABI ? 2 : 1));
27931 if (rs6000_returns_struct)
27932 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
27933 aix_struct_return ? 2 : 1);
27936 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
27937 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
27938 file_end_indicate_exec_stack ();
27941 if (flag_split_stack)
27942 file_end_indicate_split_stack ();
27946 /* We have expanded a CPU builtin, so we need to emit a reference to
27947 the special symbol that LIBC uses to declare it supports the
27948 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
27949 switch_to_section (data_section);
27950 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
27951 fprintf (asm_out_file, "\t%s %s\n",
27952 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
27959 #ifndef HAVE_XCOFF_DWARF_EXTRAS
27960 #define HAVE_XCOFF_DWARF_EXTRAS 0
27963 static enum unwind_info_type
27964 rs6000_xcoff_debug_unwind_info (void)
27970 rs6000_xcoff_asm_output_anchor (rtx symbol)
27974 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
27975 SYMBOL_REF_BLOCK_OFFSET (symbol));
27976 fprintf (asm_out_file, "%s", SET_ASM_OP);
27977 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
27978 fprintf (asm_out_file, ",");
27979 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
27980 fprintf (asm_out_file, "\n");
27984 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
27986 fputs (GLOBAL_ASM_OP, stream);
27987 RS6000_OUTPUT_BASENAME (stream, name);
27988 putc ('\n', stream);
27991 /* A get_unnamed_decl callback, used for read-only sections. PTR
27992 points to the section string variable. */
27995 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
27997 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
27998 *(const char *const *) directive,
27999 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
28002 /* Likewise for read-write sections. */
28005 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
28007 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
28008 *(const char *const *) directive,
28009 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
28013 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
28015 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
28016 *(const char *const *) directive,
28017 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
28020 /* A get_unnamed_section callback, used for switching to toc_section. */
28023 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
28025 if (TARGET_MINIMAL_TOC)
28027 /* toc_section is always selected at least once from
28028 rs6000_xcoff_file_start, so this is guaranteed to
28029 always be defined once and only once in each file. */
28030 if (!toc_initialized)
28032 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
28033 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
28034 toc_initialized = 1;
28036 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
28037 (TARGET_32BIT ? "" : ",3"));
28040 fputs ("\t.toc\n", asm_out_file);
28043 /* Implement TARGET_ASM_INIT_SECTIONS. */
28046 rs6000_xcoff_asm_init_sections (void)
28048 read_only_data_section
28049 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
28050 &xcoff_read_only_section_name);
28052 private_data_section
28053 = get_unnamed_section (SECTION_WRITE,
28054 rs6000_xcoff_output_readwrite_section_asm_op,
28055 &xcoff_private_data_section_name);
28057 read_only_private_data_section
28058 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
28059 &xcoff_private_rodata_section_name);
28062 = get_unnamed_section (SECTION_TLS,
28063 rs6000_xcoff_output_tls_section_asm_op,
28064 &xcoff_tls_data_section_name);
28066 tls_private_data_section
28067 = get_unnamed_section (SECTION_TLS,
28068 rs6000_xcoff_output_tls_section_asm_op,
28069 &xcoff_private_data_section_name);
28072 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
28074 readonly_data_section = read_only_data_section;
28078 rs6000_xcoff_reloc_rw_mask (void)
28084 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
28085 tree decl ATTRIBUTE_UNUSED)
28088 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
28090 if (flags & SECTION_EXCLUDE)
28092 else if (flags & SECTION_DEBUG)
28094 fprintf (asm_out_file, "\t.dwsect %s\n", name);
28097 else if (flags & SECTION_CODE)
28099 else if (flags & SECTION_TLS)
28101 else if (flags & SECTION_WRITE)
28106 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
28107 (flags & SECTION_CODE) ? "." : "",
28108 name, suffix[smclass], flags & SECTION_ENTSIZE);
28111 #define IN_NAMED_SECTION(DECL) \
28112 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
28113 && DECL_SECTION_NAME (DECL) != NULL)
28116 rs6000_xcoff_select_section (tree decl, int reloc,
28117 unsigned HOST_WIDE_INT align)
28119 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
28121 if (align > BIGGEST_ALIGNMENT)
28123 resolve_unique_section (decl, reloc, true);
28124 if (IN_NAMED_SECTION (decl))
28125 return get_named_section (decl, NULL, reloc);
28128 if (decl_readonly_section (decl, reloc))
28130 if (TREE_PUBLIC (decl))
28131 return read_only_data_section;
28133 return read_only_private_data_section;
28138 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
28140 if (TREE_PUBLIC (decl))
28141 return tls_data_section;
28142 else if (bss_initializer_p (decl))
28144 /* Convert to COMMON to emit in BSS. */
28145 DECL_COMMON (decl) = 1;
28146 return tls_comm_section;
28149 return tls_private_data_section;
28153 if (TREE_PUBLIC (decl))
28154 return data_section;
28156 return private_data_section;
28161 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
28165 /* Use select_section for private data and uninitialized data with
28166 alignment <= BIGGEST_ALIGNMENT. */
28167 if (!TREE_PUBLIC (decl)
28168 || DECL_COMMON (decl)
28169 || (DECL_INITIAL (decl) == NULL_TREE
28170 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
28171 || DECL_INITIAL (decl) == error_mark_node
28172 || (flag_zero_initialized_in_bss
28173 && initializer_zerop (DECL_INITIAL (decl))))
28176 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
28177 name = (*targetm.strip_name_encoding) (name);
28178 set_decl_section_name (decl, name);
28181 /* Select section for constant in constant pool.
28183 On RS/6000, all constants are in the private read-only data area.
28184 However, if this is being placed in the TOC it must be output as a
28188 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
28189 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
28191 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
28192 return toc_section;
28194 return read_only_private_data_section;
28197 /* Remove any trailing [DS] or the like from the symbol name. */
28199 static const char *
28200 rs6000_xcoff_strip_name_encoding (const char *name)
28205 len = strlen (name);
28206 if (name[len - 1] == ']')
28207 return ggc_alloc_string (name, len - 4);
28212 /* Section attributes. AIX is always PIC. */
28214 static unsigned int
28215 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
28217 unsigned int align;
28218 unsigned int flags = default_section_type_flags (decl, name, reloc);
28220 /* Align to at least UNIT size. */
28221 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
28222 align = MIN_UNITS_PER_WORD;
28224 /* Increase alignment of large objects if not already stricter. */
28225 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
28226 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
28227 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
28229 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
28232 /* Output at beginning of assembler file.
28234 Initialize the section names for the RS/6000 at this point.
28236 Specify filename, including full path, to assembler.
28238 We want to go into the TOC section so at least one .toc will be emitted.
28239 Also, in order to output proper .bs/.es pairs, we need at least one static
28240 [RW] section emitted.
28242 Finally, declare mcount when profiling to make the assembler happy. */
28245 rs6000_xcoff_file_start (void)
28247 rs6000_gen_section_name (&xcoff_bss_section_name,
28248 main_input_filename, ".bss_");
28249 rs6000_gen_section_name (&xcoff_private_data_section_name,
28250 main_input_filename, ".rw_");
28251 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
28252 main_input_filename, ".rop_");
28253 rs6000_gen_section_name (&xcoff_read_only_section_name,
28254 main_input_filename, ".ro_");
28255 rs6000_gen_section_name (&xcoff_tls_data_section_name,
28256 main_input_filename, ".tls_");
28257 rs6000_gen_section_name (&xcoff_tbss_section_name,
28258 main_input_filename, ".tbss_[UL]");
28260 fputs ("\t.file\t", asm_out_file);
28261 output_quoted_string (asm_out_file, main_input_filename);
28262 fputc ('\n', asm_out_file);
28263 if (write_symbols != NO_DEBUG)
28264 switch_to_section (private_data_section);
28265 switch_to_section (toc_section);
28266 switch_to_section (text_section);
28268 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
28269 rs6000_file_start ();
28272 /* Output at end of assembler file.
28273 On the RS/6000, referencing data should automatically pull in text. */
28276 rs6000_xcoff_file_end (void)
28278 switch_to_section (text_section);
28279 fputs ("_section_.text:\n", asm_out_file);
28280 switch_to_section (data_section);
28281 fputs (TARGET_32BIT
28282 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
28286 struct declare_alias_data
28289 bool function_descriptor;
28292 /* Declare alias N. A helper function for for_node_and_aliases. */
28295 rs6000_declare_alias (struct symtab_node *n, void *d)
28297 struct declare_alias_data *data = (struct declare_alias_data *)d;
28298 /* Main symbol is output specially, because varasm machinery does part of
28299 the job for us - we do not need to declare .globl/lglobs and such. */
28300 if (!n->alias || n->weakref)
28303 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
28306 /* Prevent assemble_alias from trying to use .set pseudo operation
28307 that does not behave as expected by the middle-end. */
28308 TREE_ASM_WRITTEN (n->decl) = true;
28310 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
28311 char *buffer = (char *) alloca (strlen (name) + 2);
28313 int dollar_inside = 0;
28315 strcpy (buffer, name);
28316 p = strchr (buffer, '$');
28320 p = strchr (p + 1, '$');
28322 if (TREE_PUBLIC (n->decl))
28324 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
28326 if (dollar_inside) {
28327 if (data->function_descriptor)
28328 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
28329 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
28331 if (data->function_descriptor)
28333 fputs ("\t.globl .", data->file);
28334 RS6000_OUTPUT_BASENAME (data->file, buffer);
28335 putc ('\n', data->file);
28337 fputs ("\t.globl ", data->file);
28338 RS6000_OUTPUT_BASENAME (data->file, buffer);
28339 putc ('\n', data->file);
28341 #ifdef ASM_WEAKEN_DECL
28342 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
28343 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
28350 if (data->function_descriptor)
28351 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
28352 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
28354 if (data->function_descriptor)
28356 fputs ("\t.lglobl .", data->file);
28357 RS6000_OUTPUT_BASENAME (data->file, buffer);
28358 putc ('\n', data->file);
28360 fputs ("\t.lglobl ", data->file);
28361 RS6000_OUTPUT_BASENAME (data->file, buffer);
28362 putc ('\n', data->file);
28364 if (data->function_descriptor)
28365 fputs (".", data->file);
28366 RS6000_OUTPUT_BASENAME (data->file, buffer);
28367 fputs (":\n", data->file);
28372 #ifdef HAVE_GAS_HIDDEN
28373 /* Helper function to calculate visibility of a DECL
28374 and return the value as a const string. */
28376 static const char *
28377 rs6000_xcoff_visibility (tree decl)
28379 static const char * const visibility_types[] = {
28380 "", ",protected", ",hidden", ",internal"
28383 enum symbol_visibility vis = DECL_VISIBILITY (decl);
28384 return visibility_types[vis];
28389 /* This macro produces the initial definition of a function name.
28390 On the RS/6000, we need to place an extra '.' in the function name and
28391 output the function descriptor.
28392 Dollar signs are converted to underscores.
28394 The csect for the function will have already been created when
28395 text_section was selected. We do have to go back to that csect, however.
28397 The third and fourth parameters to the .function pseudo-op (16 and 044)
28398 are placeholders which no longer have any use.
28400 Because AIX assembler's .set command has unexpected semantics, we output
28401 all aliases as alternative labels in front of the definition. */
28404 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
28406 char *buffer = (char *) alloca (strlen (name) + 1);
28408 int dollar_inside = 0;
28409 struct declare_alias_data data = {file, false};
28411 strcpy (buffer, name);
28412 p = strchr (buffer, '$');
28416 p = strchr (p + 1, '$');
28418 if (TREE_PUBLIC (decl))
28420 if (!RS6000_WEAK || !DECL_WEAK (decl))
28422 if (dollar_inside) {
28423 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
28424 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
28426 fputs ("\t.globl .", file);
28427 RS6000_OUTPUT_BASENAME (file, buffer);
28428 #ifdef HAVE_GAS_HIDDEN
28429 fputs (rs6000_xcoff_visibility (decl), file);
28436 if (dollar_inside) {
28437 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
28438 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
28440 fputs ("\t.lglobl .", file);
28441 RS6000_OUTPUT_BASENAME (file, buffer);
28444 fputs ("\t.csect ", file);
28445 RS6000_OUTPUT_BASENAME (file, buffer);
28446 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
28447 RS6000_OUTPUT_BASENAME (file, buffer);
28448 fputs (":\n", file);
28449 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
28451 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
28452 RS6000_OUTPUT_BASENAME (file, buffer);
28453 fputs (", TOC[tc0], 0\n", file);
28455 switch_to_section (function_section (decl));
28457 RS6000_OUTPUT_BASENAME (file, buffer);
28458 fputs (":\n", file);
28459 data.function_descriptor = true;
28460 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
28462 if (!DECL_IGNORED_P (decl))
28464 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
28465 xcoffout_declare_function (file, decl, buffer);
28466 else if (write_symbols == DWARF2_DEBUG)
28468 name = (*targetm.strip_name_encoding) (name);
28469 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
28476 /* Output assembly language to globalize a symbol from a DECL,
28477 possibly with visibility. */
28480 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
28482 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
28483 fputs (GLOBAL_ASM_OP, stream);
28484 RS6000_OUTPUT_BASENAME (stream, name);
28485 #ifdef HAVE_GAS_HIDDEN
28486 fputs (rs6000_xcoff_visibility (decl), stream);
28488 putc ('\n', stream);
28491 /* Output assembly language to define a symbol as COMMON from a DECL,
28492 possibly with visibility. */
28495 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
28496 tree decl ATTRIBUTE_UNUSED,
28498 unsigned HOST_WIDE_INT size,
28499 unsigned HOST_WIDE_INT align)
28501 unsigned HOST_WIDE_INT align2 = 2;
28504 align2 = floor_log2 (align / BITS_PER_UNIT);
28508 fputs (COMMON_ASM_OP, stream);
28509 RS6000_OUTPUT_BASENAME (stream, name);
28512 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
28515 #ifdef HAVE_GAS_HIDDEN
28517 fputs (rs6000_xcoff_visibility (decl), stream);
28519 putc ('\n', stream);
28522 /* This macro produces the initial definition of a object (variable) name.
28523 Because AIX assembler's .set command has unexpected semantics, we output
28524 all aliases as alternative labels in front of the definition. */
28527 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
28529 struct declare_alias_data data = {file, false};
28530 RS6000_OUTPUT_BASENAME (file, name);
28531 fputs (":\n", file);
28532 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
28536 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
28539 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
28541 fputs (integer_asm_op (size, FALSE), file);
28542 assemble_name (file, label);
28543 fputs ("-$", file);
28546 /* Output a symbol offset relative to the dbase for the current object.
28547 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
28550 __gcc_unwind_dbase is embedded in all executables/libraries through
28551 libgcc/config/rs6000/crtdbase.S. */
28554 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
28556 fputs (integer_asm_op (size, FALSE), file);
28557 assemble_name (file, label);
28558 fputs("-__gcc_unwind_dbase", file);
28563 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
28567 const char *symname;
28569 default_encode_section_info (decl, rtl, first);
28571 /* Careful not to prod global register variables. */
28574 symbol = XEXP (rtl, 0);
28575 if (!SYMBOL_REF_P (symbol))
28578 flags = SYMBOL_REF_FLAGS (symbol);
28580 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
28581 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
28583 SYMBOL_REF_FLAGS (symbol) = flags;
28585 /* Append mapping class to extern decls. */
28586 symname = XSTR (symbol, 0);
28587 if (decl /* sync condition with assemble_external () */
28588 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
28589 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
28590 || TREE_CODE (decl) == FUNCTION_DECL)
28591 && symname[strlen (symname) - 1] != ']')
28593 char *newname = (char *) alloca (strlen (symname) + 5);
28594 strcpy (newname, symname);
28595 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
28596 ? "[DS]" : "[UA]"));
28597 XSTR (symbol, 0) = ggc_strdup (newname);
28600 #endif /* HAVE_AS_TLS */
28601 #endif /* TARGET_XCOFF */
28604 rs6000_asm_weaken_decl (FILE *stream, tree decl,
28605 const char *name, const char *val)
28607 fputs ("\t.weak\t", stream);
28608 RS6000_OUTPUT_BASENAME (stream, name);
28609 if (decl && TREE_CODE (decl) == FUNCTION_DECL
28610 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
28613 fputs ("[DS]", stream);
28614 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
28616 fputs (rs6000_xcoff_visibility (decl), stream);
28618 fputs ("\n\t.weak\t.", stream);
28619 RS6000_OUTPUT_BASENAME (stream, name);
28621 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
28623 fputs (rs6000_xcoff_visibility (decl), stream);
28625 fputc ('\n', stream);
28628 #ifdef ASM_OUTPUT_DEF
28629 ASM_OUTPUT_DEF (stream, name, val);
28631 if (decl && TREE_CODE (decl) == FUNCTION_DECL
28632 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
28634 fputs ("\t.set\t.", stream);
28635 RS6000_OUTPUT_BASENAME (stream, name);
28636 fputs (",.", stream);
28637 RS6000_OUTPUT_BASENAME (stream, val);
28638 fputc ('\n', stream);
28644 /* Return true if INSN should not be copied. */
28647 rs6000_cannot_copy_insn_p (rtx_insn *insn)
28649 return recog_memoized (insn) >= 0
28650 && get_attr_cannot_copy (insn);
28653 /* Compute a (partial) cost for rtx X. Return true if the complete
28654 cost has been computed, and false if subexpressions should be
28655 scanned. In either case, *TOTAL contains the cost result. */
28658 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
28659 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
28661 int code = GET_CODE (x);
28665 /* On the RS/6000, if it is valid in the insn, it is free. */
28667 if (((outer_code == SET
28668 || outer_code == PLUS
28669 || outer_code == MINUS)
28670 && (satisfies_constraint_I (x)
28671 || satisfies_constraint_L (x)))
28672 || (outer_code == AND
28673 && (satisfies_constraint_K (x)
28675 ? satisfies_constraint_L (x)
28676 : satisfies_constraint_J (x))))
28677 || ((outer_code == IOR || outer_code == XOR)
28678 && (satisfies_constraint_K (x)
28680 ? satisfies_constraint_L (x)
28681 : satisfies_constraint_J (x))))
28682 || outer_code == ASHIFT
28683 || outer_code == ASHIFTRT
28684 || outer_code == LSHIFTRT
28685 || outer_code == ROTATE
28686 || outer_code == ROTATERT
28687 || outer_code == ZERO_EXTRACT
28688 || (outer_code == MULT
28689 && satisfies_constraint_I (x))
28690 || ((outer_code == DIV || outer_code == UDIV
28691 || outer_code == MOD || outer_code == UMOD)
28692 && exact_log2 (INTVAL (x)) >= 0)
28693 || (outer_code == COMPARE
28694 && (satisfies_constraint_I (x)
28695 || satisfies_constraint_K (x)))
28696 || ((outer_code == EQ || outer_code == NE)
28697 && (satisfies_constraint_I (x)
28698 || satisfies_constraint_K (x)
28700 ? satisfies_constraint_L (x)
28701 : satisfies_constraint_J (x))))
28702 || (outer_code == GTU
28703 && satisfies_constraint_I (x))
28704 || (outer_code == LTU
28705 && satisfies_constraint_P (x)))
28710 else if ((outer_code == PLUS
28711 && reg_or_add_cint_operand (x, VOIDmode))
28712 || (outer_code == MINUS
28713 && reg_or_sub_cint_operand (x, VOIDmode))
28714 || ((outer_code == SET
28715 || outer_code == IOR
28716 || outer_code == XOR)
28718 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
28720 *total = COSTS_N_INSNS (1);
28726 case CONST_WIDE_INT:
28730 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
28734 /* When optimizing for size, MEM should be slightly more expensive
28735 than generating address, e.g., (plus (reg) (const)).
28736 L1 cache latency is about two instructions. */
28737 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
28738 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
28739 *total += COSTS_N_INSNS (100);
28748 if (FLOAT_MODE_P (mode))
28749 *total = rs6000_cost->fp;
28751 *total = COSTS_N_INSNS (1);
28755 if (CONST_INT_P (XEXP (x, 1))
28756 && satisfies_constraint_I (XEXP (x, 1)))
28758 if (INTVAL (XEXP (x, 1)) >= -256
28759 && INTVAL (XEXP (x, 1)) <= 255)
28760 *total = rs6000_cost->mulsi_const9;
28762 *total = rs6000_cost->mulsi_const;
28764 else if (mode == SFmode)
28765 *total = rs6000_cost->fp;
28766 else if (FLOAT_MODE_P (mode))
28767 *total = rs6000_cost->dmul;
28768 else if (mode == DImode)
28769 *total = rs6000_cost->muldi;
28771 *total = rs6000_cost->mulsi;
28775 if (mode == SFmode)
28776 *total = rs6000_cost->fp;
28778 *total = rs6000_cost->dmul;
28783 if (FLOAT_MODE_P (mode))
28785 *total = mode == DFmode ? rs6000_cost->ddiv
28786 : rs6000_cost->sdiv;
28793 if (CONST_INT_P (XEXP (x, 1))
28794 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
28796 if (code == DIV || code == MOD)
28798 *total = COSTS_N_INSNS (2);
28801 *total = COSTS_N_INSNS (1);
28805 if (GET_MODE (XEXP (x, 1)) == DImode)
28806 *total = rs6000_cost->divdi;
28808 *total = rs6000_cost->divsi;
28810 /* Add in shift and subtract for MOD unless we have a mod instruction. */
28811 if (!TARGET_MODULO && (code == MOD || code == UMOD))
28812 *total += COSTS_N_INSNS (2);
28816 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
28820 *total = COSTS_N_INSNS (4);
28824 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
28828 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
28832 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
28835 *total = COSTS_N_INSNS (1);
28839 if (CONST_INT_P (XEXP (x, 1)))
28841 rtx left = XEXP (x, 0);
28842 rtx_code left_code = GET_CODE (left);
28844 /* rotate-and-mask: 1 insn. */
28845 if ((left_code == ROTATE
28846 || left_code == ASHIFT
28847 || left_code == LSHIFTRT)
28848 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
28850 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
28851 if (!CONST_INT_P (XEXP (left, 1)))
28852 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
28853 *total += COSTS_N_INSNS (1);
28857 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
28858 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
28859 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
28860 || (val & 0xffff) == val
28861 || (val & 0xffff0000) == val
28862 || ((val & 0xffff) == 0 && mode == SImode))
28864 *total = rtx_cost (left, mode, AND, 0, speed);
28865 *total += COSTS_N_INSNS (1);
28870 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
28872 *total = rtx_cost (left, mode, AND, 0, speed);
28873 *total += COSTS_N_INSNS (2);
28878 *total = COSTS_N_INSNS (1);
28883 *total = COSTS_N_INSNS (1);
28889 *total = COSTS_N_INSNS (1);
28893 /* The EXTSWSLI instruction is a combined instruction. Don't count both
28894 the sign extend and shift separately within the insn. */
28895 if (TARGET_EXTSWSLI && mode == DImode
28896 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
28897 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
28908 /* Handle mul_highpart. */
28909 if (outer_code == TRUNCATE
28910 && GET_CODE (XEXP (x, 0)) == MULT)
28912 if (mode == DImode)
28913 *total = rs6000_cost->muldi;
28915 *total = rs6000_cost->mulsi;
28918 else if (outer_code == AND)
28921 *total = COSTS_N_INSNS (1);
28926 if (MEM_P (XEXP (x, 0)))
28929 *total = COSTS_N_INSNS (1);
28935 if (!FLOAT_MODE_P (mode))
28937 *total = COSTS_N_INSNS (1);
28943 case UNSIGNED_FLOAT:
28946 case FLOAT_TRUNCATE:
28947 *total = rs6000_cost->fp;
28951 if (mode == DFmode)
28952 *total = rs6000_cost->sfdf_convert;
28954 *total = rs6000_cost->fp;
28958 switch (XINT (x, 1))
28961 *total = rs6000_cost->fp;
28973 *total = COSTS_N_INSNS (1);
28976 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
28978 *total = rs6000_cost->fp;
28987 /* Carry bit requires mode == Pmode.
28988 NEG or PLUS already counted so only add one. */
28990 && (outer_code == NEG || outer_code == PLUS))
28992 *total = COSTS_N_INSNS (1);
29000 if (outer_code == SET)
29002 if (XEXP (x, 1) == const0_rtx)
29004 *total = COSTS_N_INSNS (2);
29009 *total = COSTS_N_INSNS (3);
29014 if (outer_code == COMPARE)
29028 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
29031 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
29032 int opno, int *total, bool speed)
29034 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
29037 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
29038 "opno = %d, total = %d, speed = %s, x:\n",
29039 ret ? "complete" : "scan inner",
29040 GET_MODE_NAME (mode),
29041 GET_RTX_NAME (outer_code),
29044 speed ? "true" : "false");
29052 rs6000_insn_cost (rtx_insn *insn, bool speed)
29054 if (recog_memoized (insn) < 0)
29058 return get_attr_length (insn);
29060 int cost = get_attr_cost (insn);
29064 int n = get_attr_length (insn) / 4;
29065 enum attr_type type = get_attr_type (insn);
29072 cost = COSTS_N_INSNS (n + 1);
29076 switch (get_attr_size (insn))
29079 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
29082 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
29085 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
29088 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
29091 gcc_unreachable ();
29095 switch (get_attr_size (insn))
29098 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
29101 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
29104 gcc_unreachable ();
29109 cost = n * rs6000_cost->fp;
29112 cost = n * rs6000_cost->dmul;
29115 cost = n * rs6000_cost->sdiv;
29118 cost = n * rs6000_cost->ddiv;
29125 cost = COSTS_N_INSNS (n + 2);
29129 cost = COSTS_N_INSNS (n);
29135 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
29138 rs6000_debug_address_cost (rtx x, machine_mode mode,
29139 addr_space_t as, bool speed)
29141 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
29143 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
29144 ret, speed ? "true" : "false");
29151 /* A C expression returning the cost of moving data from a register of class
29152 CLASS1 to one of CLASS2. */
29155 rs6000_register_move_cost (machine_mode mode,
29156 reg_class_t from, reg_class_t to)
29159 reg_class_t rclass;
29161 if (TARGET_DEBUG_COST)
29164 /* If we have VSX, we can easily move between FPR or Altivec registers,
29165 otherwise we can only easily move within classes.
29166 Do this first so we give best-case answers for union classes
29167 containing both gprs and vsx regs. */
29168 HARD_REG_SET to_vsx, from_vsx;
29169 COPY_HARD_REG_SET (to_vsx, reg_class_contents[to]);
29170 AND_HARD_REG_SET (to_vsx, reg_class_contents[VSX_REGS]);
29171 COPY_HARD_REG_SET (from_vsx, reg_class_contents[from]);
29172 AND_HARD_REG_SET (from_vsx, reg_class_contents[VSX_REGS]);
29173 if (!hard_reg_set_empty_p (to_vsx)
29174 && !hard_reg_set_empty_p (from_vsx)
29176 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
29178 int reg = FIRST_FPR_REGNO;
29180 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
29181 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
29182 reg = FIRST_ALTIVEC_REGNO;
29183 ret = 2 * hard_regno_nregs (reg, mode);
29186 /* Moves from/to GENERAL_REGS. */
29187 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
29188 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
29190 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
29192 if (TARGET_DIRECT_MOVE)
29194 /* Keep the cost for direct moves above that for within
29195 a register class even if the actual processor cost is
29196 comparable. We do this because a direct move insn
29197 can't be a nop, whereas with ideal register
29198 allocation a move within the same class might turn
29199 out to be a nop. */
29200 if (rs6000_tune == PROCESSOR_POWER9
29201 || rs6000_tune == PROCESSOR_FUTURE)
29202 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29204 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29205 /* SFmode requires a conversion when moving between gprs
29207 if (mode == SFmode)
29211 ret = (rs6000_memory_move_cost (mode, rclass, false)
29212 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
29215 /* It's more expensive to move CR_REGS than CR0_REGS because of the
29217 else if (rclass == CR_REGS)
29220 /* For those processors that have slow LR/CTR moves, make them more
29221 expensive than memory in order to bias spills to memory .*/
29222 else if ((rs6000_tune == PROCESSOR_POWER6
29223 || rs6000_tune == PROCESSOR_POWER7
29224 || rs6000_tune == PROCESSOR_POWER8
29225 || rs6000_tune == PROCESSOR_POWER9)
29226 && reg_class_subset_p (rclass, SPECIAL_REGS))
29227 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29230 /* A move will cost one instruction per GPR moved. */
29231 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29234 /* Everything else has to go through GENERAL_REGS. */
29236 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
29237 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
29239 if (TARGET_DEBUG_COST)
29241 if (dbg_cost_ctrl == 1)
29243 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
29244 ret, GET_MODE_NAME (mode), reg_class_names[from],
29245 reg_class_names[to]);
29252 /* A C expressions returning the cost of moving data of MODE from a register to
29256 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
29257 bool in ATTRIBUTE_UNUSED)
29261 if (TARGET_DEBUG_COST)
29264 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
29265 ret = 4 * hard_regno_nregs (0, mode);
29266 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
29267 || reg_classes_intersect_p (rclass, VSX_REGS)))
29268 ret = 4 * hard_regno_nregs (32, mode);
29269 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
29270 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
29272 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
29274 if (TARGET_DEBUG_COST)
29276 if (dbg_cost_ctrl == 1)
29278 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
29279 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
29286 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
29288 The register allocator chooses GEN_OR_VSX_REGS for the allocno
29289 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
29290 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
29291 move cost between GENERAL_REGS and VSX_REGS low.
29293 It might seem reasonable to use a union class. After all, if usage
29294 of vsr is low and gpr high, it might make sense to spill gpr to vsr
29295 rather than memory. However, in cases where register pressure of
29296 both is high, like the cactus_adm spec test, allowing
29297 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
29298 the first scheduling pass. This is partly due to an allocno of
29299 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
29300 class, which gives too high a pressure for GENERAL_REGS and too low
29301 for VSX_REGS. So, force a choice of the subclass here.
29303 The best class is also the union if GENERAL_REGS and VSX_REGS have
29304 the same cost. In that case we do use GEN_OR_VSX_REGS as the
29305 allocno class, since trying to narrow down the class by regno mode
29306 is prone to error. For example, SImode is allowed in VSX regs and
29307 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
29308 it would be wrong to choose an allocno of GENERAL_REGS based on
29312 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
29313 reg_class_t allocno_class,
29314 reg_class_t best_class)
29316 switch (allocno_class)
29318 case GEN_OR_VSX_REGS:
29319 /* best_class must be a subset of allocno_class. */
29320 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
29321 || best_class == GEN_OR_FLOAT_REGS
29322 || best_class == VSX_REGS
29323 || best_class == ALTIVEC_REGS
29324 || best_class == FLOAT_REGS
29325 || best_class == GENERAL_REGS
29326 || best_class == BASE_REGS);
29327 /* Use best_class but choose wider classes when copying from the
29328 wider class to best_class is cheap. This mimics IRA choice
29329 of allocno class. */
29330 if (best_class == BASE_REGS)
29331 return GENERAL_REGS;
29333 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
29341 return allocno_class;
29344 /* Returns a code for a target-specific builtin that implements
29345 reciprocal of the function, or NULL_TREE if not available. */
29348 rs6000_builtin_reciprocal (tree fndecl)
29350 switch (DECL_FUNCTION_CODE (fndecl))
29352 case VSX_BUILTIN_XVSQRTDP:
29353 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
29356 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
29358 case VSX_BUILTIN_XVSQRTSP:
29359 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
29362 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
29369 /* Load up a constant. If the mode is a vector mode, splat the value across
29370 all of the vector elements. */
29373 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
29377 if (mode == SFmode || mode == DFmode)
29379 rtx d = const_double_from_real_value (dconst, mode);
29380 reg = force_reg (mode, d);
29382 else if (mode == V4SFmode)
29384 rtx d = const_double_from_real_value (dconst, SFmode);
29385 rtvec v = gen_rtvec (4, d, d, d, d);
29386 reg = gen_reg_rtx (mode);
29387 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
29389 else if (mode == V2DFmode)
29391 rtx d = const_double_from_real_value (dconst, DFmode);
29392 rtvec v = gen_rtvec (2, d, d);
29393 reg = gen_reg_rtx (mode);
29394 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
29397 gcc_unreachable ();
29402 /* Generate an FMA instruction. */
29405 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
29407 machine_mode mode = GET_MODE (target);
29410 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
29411 gcc_assert (dst != NULL);
29414 emit_move_insn (target, dst);
29417 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
29420 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
29422 machine_mode mode = GET_MODE (dst);
29425 /* This is a tad more complicated, since the fnma_optab is for
29426 a different expression: fma(-m1, m2, a), which is the same
29427 thing except in the case of signed zeros.
29429 Fortunately we know that if FMA is supported that FNMSUB is
29430 also supported in the ISA. Just expand it directly. */
29432 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
29434 r = gen_rtx_NEG (mode, a);
29435 r = gen_rtx_FMA (mode, m1, m2, r);
29436 r = gen_rtx_NEG (mode, r);
29437 emit_insn (gen_rtx_SET (dst, r));
29440 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
29441 add a reg_note saying that this was a division. Support both scalar and
29442 vector divide. Assumes no trapping math and finite arguments. */
29445 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
29447 machine_mode mode = GET_MODE (dst);
29448 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
29451 /* Low precision estimates guarantee 5 bits of accuracy. High
29452 precision estimates guarantee 14 bits of accuracy. SFmode
29453 requires 23 bits of accuracy. DFmode requires 52 bits of
29454 accuracy. Each pass at least doubles the accuracy, leading
29455 to the following. */
29456 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
29457 if (mode == DFmode || mode == V2DFmode)
29460 enum insn_code code = optab_handler (smul_optab, mode);
29461 insn_gen_fn gen_mul = GEN_FCN (code);
29463 gcc_assert (code != CODE_FOR_nothing);
29465 one = rs6000_load_constant_and_splat (mode, dconst1);
29467 /* x0 = 1./d estimate */
29468 x0 = gen_reg_rtx (mode);
29469 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
29472 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
29475 /* e0 = 1. - d * x0 */
29476 e0 = gen_reg_rtx (mode);
29477 rs6000_emit_nmsub (e0, d, x0, one);
29479 /* x1 = x0 + e0 * x0 */
29480 x1 = gen_reg_rtx (mode);
29481 rs6000_emit_madd (x1, e0, x0, x0);
29483 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
29484 ++i, xprev = xnext, eprev = enext) {
29486 /* enext = eprev * eprev */
29487 enext = gen_reg_rtx (mode);
29488 emit_insn (gen_mul (enext, eprev, eprev));
29490 /* xnext = xprev + enext * xprev */
29491 xnext = gen_reg_rtx (mode);
29492 rs6000_emit_madd (xnext, enext, xprev, xprev);
29498 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
29500 /* u = n * xprev */
29501 u = gen_reg_rtx (mode);
29502 emit_insn (gen_mul (u, n, xprev));
29504 /* v = n - (d * u) */
29505 v = gen_reg_rtx (mode);
29506 rs6000_emit_nmsub (v, d, u, n);
29508 /* dst = (v * xprev) + u */
29509 rs6000_emit_madd (dst, v, xprev, u);
29512 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
29515 /* Goldschmidt's Algorithm for single/double-precision floating point
29516 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
29519 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
29521 machine_mode mode = GET_MODE (src);
29522 rtx e = gen_reg_rtx (mode);
29523 rtx g = gen_reg_rtx (mode);
29524 rtx h = gen_reg_rtx (mode);
29526 /* Low precision estimates guarantee 5 bits of accuracy. High
29527 precision estimates guarantee 14 bits of accuracy. SFmode
29528 requires 23 bits of accuracy. DFmode requires 52 bits of
29529 accuracy. Each pass at least doubles the accuracy, leading
29530 to the following. */
29531 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
29532 if (mode == DFmode || mode == V2DFmode)
29537 enum insn_code code = optab_handler (smul_optab, mode);
29538 insn_gen_fn gen_mul = GEN_FCN (code);
29540 gcc_assert (code != CODE_FOR_nothing);
29542 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
29544 /* e = rsqrt estimate */
29545 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
29548 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
29551 rtx zero = force_reg (mode, CONST0_RTX (mode));
29553 if (mode == SFmode)
29555 rtx target = emit_conditional_move (e, GT, src, zero, mode,
29558 emit_move_insn (e, target);
29562 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
29563 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
29567 /* g = sqrt estimate. */
29568 emit_insn (gen_mul (g, e, src));
29569 /* h = 1/(2*sqrt) estimate. */
29570 emit_insn (gen_mul (h, e, mhalf));
29576 rtx t = gen_reg_rtx (mode);
29577 rs6000_emit_nmsub (t, g, h, mhalf);
29578 /* Apply correction directly to 1/rsqrt estimate. */
29579 rs6000_emit_madd (dst, e, t, e);
29583 for (i = 0; i < passes; i++)
29585 rtx t1 = gen_reg_rtx (mode);
29586 rtx g1 = gen_reg_rtx (mode);
29587 rtx h1 = gen_reg_rtx (mode);
29589 rs6000_emit_nmsub (t1, g, h, mhalf);
29590 rs6000_emit_madd (g1, g, t1, g);
29591 rs6000_emit_madd (h1, h, t1, h);
29596 /* Multiply by 2 for 1/rsqrt. */
29597 emit_insn (gen_add3_insn (dst, h, h));
29602 rtx t = gen_reg_rtx (mode);
29603 rs6000_emit_nmsub (t, g, h, mhalf);
29604 rs6000_emit_madd (dst, g, t, g);
29610 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
29611 (Power7) targets. DST is the target, and SRC is the argument operand. */
29614 rs6000_emit_popcount (rtx dst, rtx src)
29616 machine_mode mode = GET_MODE (dst);
29619 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
29620 if (TARGET_POPCNTD)
29622 if (mode == SImode)
29623 emit_insn (gen_popcntdsi2 (dst, src));
29625 emit_insn (gen_popcntddi2 (dst, src));
29629 tmp1 = gen_reg_rtx (mode);
29631 if (mode == SImode)
29633 emit_insn (gen_popcntbsi2 (tmp1, src));
29634 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
29636 tmp2 = force_reg (SImode, tmp2);
29637 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
29641 emit_insn (gen_popcntbdi2 (tmp1, src));
29642 tmp2 = expand_mult (DImode, tmp1,
29643 GEN_INT ((HOST_WIDE_INT)
29644 0x01010101 << 32 | 0x01010101),
29646 tmp2 = force_reg (DImode, tmp2);
29647 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
29652 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
29653 target, and SRC is the argument operand. */
29656 rs6000_emit_parity (rtx dst, rtx src)
29658 machine_mode mode = GET_MODE (dst);
29661 tmp = gen_reg_rtx (mode);
29663 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
29666 if (mode == SImode)
29668 emit_insn (gen_popcntbsi2 (tmp, src));
29669 emit_insn (gen_paritysi2_cmpb (dst, tmp));
29673 emit_insn (gen_popcntbdi2 (tmp, src));
29674 emit_insn (gen_paritydi2_cmpb (dst, tmp));
29679 if (mode == SImode)
29681 /* Is mult+shift >= shift+xor+shift+xor? */
29682 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
29684 rtx tmp1, tmp2, tmp3, tmp4;
29686 tmp1 = gen_reg_rtx (SImode);
29687 emit_insn (gen_popcntbsi2 (tmp1, src));
29689 tmp2 = gen_reg_rtx (SImode);
29690 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
29691 tmp3 = gen_reg_rtx (SImode);
29692 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
29694 tmp4 = gen_reg_rtx (SImode);
29695 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
29696 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
29699 rs6000_emit_popcount (tmp, src);
29700 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
29704 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
29705 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
29707 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
29709 tmp1 = gen_reg_rtx (DImode);
29710 emit_insn (gen_popcntbdi2 (tmp1, src));
29712 tmp2 = gen_reg_rtx (DImode);
29713 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
29714 tmp3 = gen_reg_rtx (DImode);
29715 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
29717 tmp4 = gen_reg_rtx (DImode);
29718 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
29719 tmp5 = gen_reg_rtx (DImode);
29720 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
29722 tmp6 = gen_reg_rtx (DImode);
29723 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
29724 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
29727 rs6000_emit_popcount (tmp, src);
29728 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
29732 /* Expand an Altivec constant permutation for little endian mode.
29733 OP0 and OP1 are the input vectors and TARGET is the output vector.
29734 SEL specifies the constant permutation vector.
29736 There are two issues: First, the two input operands must be
29737 swapped so that together they form a double-wide array in LE
29738 order. Second, the vperm instruction has surprising behavior
29739 in LE mode: it interprets the elements of the source vectors
29740 in BE mode ("left to right") and interprets the elements of
29741 the destination vector in LE mode ("right to left"). To
29742 correct for this, we must subtract each element of the permute
29743 control vector from 31.
29745 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
29746 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
29747 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
29748 serve as the permute control vector. Then, in BE mode,
29752 places the desired result in vr9. However, in LE mode the
29753 vector contents will be
29755 vr10 = 00000003 00000002 00000001 00000000
29756 vr11 = 00000007 00000006 00000005 00000004
29758 The result of the vperm using the same permute control vector is
29760 vr9 = 05000000 07000000 01000000 03000000
29762 That is, the leftmost 4 bytes of vr10 are interpreted as the
29763 source for the rightmost 4 bytes of vr9, and so on.
29765 If we change the permute control vector to
29767 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
29775 vr9 = 00000006 00000004 00000002 00000000. */
29778 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
29779 const vec_perm_indices &sel)
29783 rtx constv, unspec;
29785 /* Unpack and adjust the constant selector. */
29786 for (i = 0; i < 16; ++i)
29788 unsigned int elt = 31 - (sel[i] & 31);
29789 perm[i] = GEN_INT (elt);
29792 /* Expand to a permute, swapping the inputs and using the
29793 adjusted selector. */
29795 op0 = force_reg (V16QImode, op0);
29797 op1 = force_reg (V16QImode, op1);
29799 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
29800 constv = force_reg (V16QImode, constv);
29801 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
29803 if (!REG_P (target))
29805 rtx tmp = gen_reg_rtx (V16QImode);
29806 emit_move_insn (tmp, unspec);
29810 emit_move_insn (target, unspec);
29813 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
29814 permute control vector. But here it's not a constant, so we must
29815 generate a vector NAND or NOR to do the adjustment. */
29818 altivec_expand_vec_perm_le (rtx operands[4])
29820 rtx notx, iorx, unspec;
29821 rtx target = operands[0];
29822 rtx op0 = operands[1];
29823 rtx op1 = operands[2];
29824 rtx sel = operands[3];
29826 rtx norreg = gen_reg_rtx (V16QImode);
29827 machine_mode mode = GET_MODE (target);
29829 /* Get everything in regs so the pattern matches. */
29831 op0 = force_reg (mode, op0);
29833 op1 = force_reg (mode, op1);
29835 sel = force_reg (V16QImode, sel);
29836 if (!REG_P (target))
29837 tmp = gen_reg_rtx (mode);
29839 if (TARGET_P9_VECTOR)
29841 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
29846 /* Invert the selector with a VNAND if available, else a VNOR.
29847 The VNAND is preferred for future fusion opportunities. */
29848 notx = gen_rtx_NOT (V16QImode, sel);
29849 iorx = (TARGET_P8_VECTOR
29850 ? gen_rtx_IOR (V16QImode, notx, notx)
29851 : gen_rtx_AND (V16QImode, notx, notx));
29852 emit_insn (gen_rtx_SET (norreg, iorx));
29854 /* Permute with operands reversed and adjusted selector. */
29855 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
29859 /* Copy into target, possibly by way of a register. */
29860 if (!REG_P (target))
29862 emit_move_insn (tmp, unspec);
29866 emit_move_insn (target, unspec);
29869 /* Expand an Altivec constant permutation. Return true if we match
29870 an efficient implementation; false to fall back to VPERM.
29872 OP0 and OP1 are the input vectors and TARGET is the output vector.
29873 SEL specifies the constant permutation vector. */
29876 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
29877 const vec_perm_indices &sel)
29879 struct altivec_perm_insn {
29880 HOST_WIDE_INT mask;
29881 enum insn_code impl;
29882 unsigned char perm[16];
29884 static const struct altivec_perm_insn patterns[] = {
29885 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
29886 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
29887 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
29888 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
29889 { OPTION_MASK_ALTIVEC,
29890 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
29891 : CODE_FOR_altivec_vmrglb_direct),
29892 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
29893 { OPTION_MASK_ALTIVEC,
29894 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
29895 : CODE_FOR_altivec_vmrglh_direct),
29896 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
29897 { OPTION_MASK_ALTIVEC,
29898 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
29899 : CODE_FOR_altivec_vmrglw_direct),
29900 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
29901 { OPTION_MASK_ALTIVEC,
29902 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
29903 : CODE_FOR_altivec_vmrghb_direct),
29904 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
29905 { OPTION_MASK_ALTIVEC,
29906 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
29907 : CODE_FOR_altivec_vmrghh_direct),
29908 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
29909 { OPTION_MASK_ALTIVEC,
29910 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
29911 : CODE_FOR_altivec_vmrghw_direct),
29912 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
29913 { OPTION_MASK_P8_VECTOR,
29914 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
29915 : CODE_FOR_p8_vmrgow_v4sf_direct),
29916 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
29917 { OPTION_MASK_P8_VECTOR,
29918 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
29919 : CODE_FOR_p8_vmrgew_v4sf_direct),
29920 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
29923 unsigned int i, j, elt, which;
29924 unsigned char perm[16];
29928 /* Unpack the constant selector. */
29929 for (i = which = 0; i < 16; ++i)
29932 which |= (elt < 16 ? 1 : 2);
29936 /* Simplify the constant selector based on operands. */
29940 gcc_unreachable ();
29944 if (!rtx_equal_p (op0, op1))
29949 for (i = 0; i < 16; ++i)
29961 /* Look for splat patterns. */
29966 for (i = 0; i < 16; ++i)
29967 if (perm[i] != elt)
29971 if (!BYTES_BIG_ENDIAN)
29973 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
29979 for (i = 0; i < 16; i += 2)
29980 if (perm[i] != elt || perm[i + 1] != elt + 1)
29984 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
29985 x = gen_reg_rtx (V8HImode);
29986 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
29988 emit_move_insn (target, gen_lowpart (V16QImode, x));
29995 for (i = 0; i < 16; i += 4)
29997 || perm[i + 1] != elt + 1
29998 || perm[i + 2] != elt + 2
29999 || perm[i + 3] != elt + 3)
30003 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
30004 x = gen_reg_rtx (V4SImode);
30005 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
30007 emit_move_insn (target, gen_lowpart (V16QImode, x));
30013 /* Look for merge and pack patterns. */
30014 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
30018 if ((patterns[j].mask & rs6000_isa_flags) == 0)
30021 elt = patterns[j].perm[0];
30022 if (perm[0] == elt)
30024 else if (perm[0] == elt + 16)
30028 for (i = 1; i < 16; ++i)
30030 elt = patterns[j].perm[i];
30032 elt = (elt >= 16 ? elt - 16 : elt + 16);
30033 else if (one_vec && elt >= 16)
30035 if (perm[i] != elt)
30040 enum insn_code icode = patterns[j].impl;
30041 machine_mode omode = insn_data[icode].operand[0].mode;
30042 machine_mode imode = insn_data[icode].operand[1].mode;
30044 /* For little-endian, don't use vpkuwum and vpkuhum if the
30045 underlying vector type is not V4SI and V8HI, respectively.
30046 For example, using vpkuwum with a V8HI picks up the even
30047 halfwords (BE numbering) when the even halfwords (LE
30048 numbering) are what we need. */
30049 if (!BYTES_BIG_ENDIAN
30050 && icode == CODE_FOR_altivec_vpkuwum_direct
30052 && GET_MODE (op0) != V4SImode)
30054 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
30056 if (!BYTES_BIG_ENDIAN
30057 && icode == CODE_FOR_altivec_vpkuhum_direct
30059 && GET_MODE (op0) != V8HImode)
30061 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
30064 /* For little-endian, the two input operands must be swapped
30065 (or swapped back) to ensure proper right-to-left numbering
30067 if (swapped ^ !BYTES_BIG_ENDIAN)
30068 std::swap (op0, op1);
30069 if (imode != V16QImode)
30071 op0 = gen_lowpart (imode, op0);
30072 op1 = gen_lowpart (imode, op1);
30074 if (omode == V16QImode)
30077 x = gen_reg_rtx (omode);
30078 emit_insn (GEN_FCN (icode) (x, op0, op1));
30079 if (omode != V16QImode)
30080 emit_move_insn (target, gen_lowpart (V16QImode, x));
30085 if (!BYTES_BIG_ENDIAN)
30087 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
30094 /* Expand a VSX Permute Doubleword constant permutation.
30095 Return true if we match an efficient implementation. */
30098 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
30099 unsigned char perm0, unsigned char perm1)
30103 /* If both selectors come from the same operand, fold to single op. */
30104 if ((perm0 & 2) == (perm1 & 2))
30111 /* If both operands are equal, fold to simpler permutation. */
30112 if (rtx_equal_p (op0, op1))
30115 perm1 = (perm1 & 1) + 2;
30117 /* If the first selector comes from the second operand, swap. */
30118 else if (perm0 & 2)
30124 std::swap (op0, op1);
30126 /* If the second selector does not come from the second operand, fail. */
30127 else if ((perm1 & 2) == 0)
30131 if (target != NULL)
30133 machine_mode vmode, dmode;
30136 vmode = GET_MODE (target);
30137 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
30138 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
30139 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
30140 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
30141 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
30142 emit_insn (gen_rtx_SET (target, x));
30147 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
30150 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
30151 rtx op1, const vec_perm_indices &sel)
30153 bool testing_p = !target;
30155 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
30156 if (TARGET_ALTIVEC && testing_p)
30159 /* Check for ps_merge* or xxpermdi insns. */
30160 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
30164 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
30165 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
30167 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
30171 if (TARGET_ALTIVEC)
30173 /* Force the target-independent code to lower to V16QImode. */
30174 if (vmode != V16QImode)
30176 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
30183 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
30184 OP0 and OP1 are the input vectors and TARGET is the output vector.
30185 PERM specifies the constant permutation vector. */
30188 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
30189 machine_mode vmode, const vec_perm_builder &perm)
30191 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
30193 emit_move_insn (target, x);
30196 /* Expand an extract even operation. */
30199 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
30201 machine_mode vmode = GET_MODE (target);
30202 unsigned i, nelt = GET_MODE_NUNITS (vmode);
30203 vec_perm_builder perm (nelt, nelt, 1);
30205 for (i = 0; i < nelt; i++)
30206 perm.quick_push (i * 2);
30208 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
30211 /* Expand a vector interleave operation. */
30214 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
30216 machine_mode vmode = GET_MODE (target);
30217 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
30218 vec_perm_builder perm (nelt, nelt, 1);
30220 high = (highp ? 0 : nelt / 2);
30221 for (i = 0; i < nelt / 2; i++)
30223 perm.quick_push (i + high);
30224 perm.quick_push (i + nelt + high);
30227 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
30230 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
30232 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
30234 HOST_WIDE_INT hwi_scale (scale);
30235 REAL_VALUE_TYPE r_pow;
30236 rtvec v = rtvec_alloc (2);
30238 rtx scale_vec = gen_reg_rtx (V2DFmode);
30239 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
30240 elt = const_double_from_real_value (r_pow, DFmode);
30241 RTVEC_ELT (v, 0) = elt;
30242 RTVEC_ELT (v, 1) = elt;
30243 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
30244 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
30247 /* Return an RTX representing where to find the function value of a
30248 function returning MODE. */
30250 rs6000_complex_function_value (machine_mode mode)
30252 unsigned int regno;
30254 machine_mode inner = GET_MODE_INNER (mode);
30255 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
30257 if (TARGET_FLOAT128_TYPE
30259 || (mode == TCmode && TARGET_IEEEQUAD)))
30260 regno = ALTIVEC_ARG_RETURN;
30262 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
30263 regno = FP_ARG_RETURN;
30267 regno = GP_ARG_RETURN;
30269 /* 32-bit is OK since it'll go in r3/r4. */
30270 if (TARGET_32BIT && inner_bytes >= 4)
30271 return gen_rtx_REG (mode, regno);
30274 if (inner_bytes >= 8)
30275 return gen_rtx_REG (mode, regno);
30277 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
30279 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
30280 GEN_INT (inner_bytes));
30281 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
30284 /* Return an rtx describing a return value of MODE as a PARALLEL
30285 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
30286 stride REG_STRIDE. */
30289 rs6000_parallel_return (machine_mode mode,
30290 int n_elts, machine_mode elt_mode,
30291 unsigned int regno, unsigned int reg_stride)
30293 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
30296 for (i = 0; i < n_elts; i++)
30298 rtx r = gen_rtx_REG (elt_mode, regno);
30299 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
30300 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
30301 regno += reg_stride;
30307 /* Target hook for TARGET_FUNCTION_VALUE.
30309 An integer value is in r3 and a floating-point value is in fp1,
30310 unless -msoft-float. */
30313 rs6000_function_value (const_tree valtype,
30314 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
30315 bool outgoing ATTRIBUTE_UNUSED)
30318 unsigned int regno;
30319 machine_mode elt_mode;
30322 /* Special handling for structs in darwin64. */
30324 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
30326 CUMULATIVE_ARGS valcum;
30330 valcum.fregno = FP_ARG_MIN_REG;
30331 valcum.vregno = ALTIVEC_ARG_MIN_REG;
30332 /* Do a trial code generation as if this were going to be passed as
30333 an argument; if any part goes in memory, we return NULL. */
30334 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
30337 /* Otherwise fall through to standard ABI rules. */
30340 mode = TYPE_MODE (valtype);
30342 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
30343 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
30345 int first_reg, n_regs;
30347 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
30349 /* _Decimal128 must use even/odd register pairs. */
30350 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
30351 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
30355 first_reg = ALTIVEC_ARG_RETURN;
30359 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
30362 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
30363 if (TARGET_32BIT && TARGET_POWERPC64)
30372 int count = GET_MODE_SIZE (mode) / 4;
30373 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
30376 if ((INTEGRAL_TYPE_P (valtype)
30377 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
30378 || POINTER_TYPE_P (valtype))
30379 mode = TARGET_32BIT ? SImode : DImode;
30381 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
30382 /* _Decimal128 must use an even/odd register pair. */
30383 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
30384 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
30385 && !FLOAT128_VECTOR_P (mode))
30386 regno = FP_ARG_RETURN;
30387 else if (TREE_CODE (valtype) == COMPLEX_TYPE
30388 && targetm.calls.split_complex_arg)
30389 return rs6000_complex_function_value (mode);
30390 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
30391 return register is used in both cases, and we won't see V2DImode/V2DFmode
30392 for pure altivec, combine the two cases. */
30393 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
30394 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
30395 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
30396 regno = ALTIVEC_ARG_RETURN;
30398 regno = GP_ARG_RETURN;
30400 return gen_rtx_REG (mode, regno);
30403 /* Define how to find the value returned by a library function
30404 assuming the value has mode MODE. */
30406 rs6000_libcall_value (machine_mode mode)
30408 unsigned int regno;
30410 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
30411 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
30412 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
30414 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
30415 /* _Decimal128 must use an even/odd register pair. */
30416 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
30417 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
30418 regno = FP_ARG_RETURN;
30419 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
30420 return register is used in both cases, and we won't see V2DImode/V2DFmode
30421 for pure altivec, combine the two cases. */
30422 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
30423 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
30424 regno = ALTIVEC_ARG_RETURN;
30425 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
30426 return rs6000_complex_function_value (mode);
30428 regno = GP_ARG_RETURN;
30430 return gen_rtx_REG (mode, regno);
30433 /* Compute register pressure classes. We implement the target hook to avoid
30434 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
30435 lead to incorrect estimates of number of available registers and therefor
30436 increased register pressure/spill. */
30438 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
30443 pressure_classes[n++] = GENERAL_REGS;
30445 pressure_classes[n++] = VSX_REGS;
30448 if (TARGET_ALTIVEC)
30449 pressure_classes[n++] = ALTIVEC_REGS;
30450 if (TARGET_HARD_FLOAT)
30451 pressure_classes[n++] = FLOAT_REGS;
30453 pressure_classes[n++] = CR_REGS;
30454 pressure_classes[n++] = SPECIAL_REGS;
30459 /* Given FROM and TO register numbers, say whether this elimination is allowed.
30460 Frame pointer elimination is automatically handled.
30462 For the RS/6000, if frame pointer elimination is being done, we would like
30463 to convert ap into fp, not sp.
30465 We need r30 if -mminimal-toc was specified, and there are constant pool
30469 rs6000_can_eliminate (const int from, const int to)
30471 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
30472 ? ! frame_pointer_needed
30473 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
30474 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
30475 || constant_pool_empty_p ()
30479 /* Define the offset between two registers, FROM to be eliminated and its
30480 replacement TO, at the start of a routine. */
30482 rs6000_initial_elimination_offset (int from, int to)
30484 rs6000_stack_t *info = rs6000_stack_info ();
30485 HOST_WIDE_INT offset;
30487 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
30488 offset = info->push_p ? 0 : -info->total_size;
30489 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
30491 offset = info->push_p ? 0 : -info->total_size;
30492 if (FRAME_GROWS_DOWNWARD)
30493 offset += info->fixed_size + info->vars_size + info->parm_size;
30495 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
30496 offset = FRAME_GROWS_DOWNWARD
30497 ? info->fixed_size + info->vars_size + info->parm_size
30499 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
30500 offset = info->total_size;
30501 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
30502 offset = info->push_p ? info->total_size : 0;
30503 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
30506 gcc_unreachable ();
30511 /* Fill in sizes of registers used by unwinder. */
30514 rs6000_init_dwarf_reg_sizes_extra (tree address)
30516 if (TARGET_MACHO && ! TARGET_ALTIVEC)
30519 machine_mode mode = TYPE_MODE (char_type_node);
30520 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
30521 rtx mem = gen_rtx_MEM (BLKmode, addr);
30522 rtx value = gen_int_mode (16, mode);
30524 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
30525 The unwinder still needs to know the size of Altivec registers. */
30527 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
30529 int column = DWARF_REG_TO_UNWIND_COLUMN
30530 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
30531 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
30533 emit_move_insn (adjust_address (mem, mode, offset), value);
30538 /* Map internal gcc register numbers to debug format register numbers.
30539 FORMAT specifies the type of debug register number to use:
30540 0 -- debug information, except for frame-related sections
30541 1 -- DWARF .debug_frame section
30542 2 -- DWARF .eh_frame section */
30545 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
30547 /* On some platforms, we use the standard DWARF register
30548 numbering for .debug_info and .debug_frame. */
30549 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
30551 #ifdef RS6000_USE_DWARF_NUMBERING
30554 if (FP_REGNO_P (regno))
30555 return regno - FIRST_FPR_REGNO + 32;
30556 if (ALTIVEC_REGNO_P (regno))
30557 return regno - FIRST_ALTIVEC_REGNO + 1124;
30558 if (regno == LR_REGNO)
30560 if (regno == CTR_REGNO)
30562 if (regno == CA_REGNO)
30563 return 101; /* XER */
30564 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
30565 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
30566 The actual code emitted saves the whole of CR, so we map CR2_REGNO
30567 to the DWARF reg for CR. */
30568 if (format == 1 && regno == CR2_REGNO)
30570 if (CR_REGNO_P (regno))
30571 return regno - CR0_REGNO + 86;
30572 if (regno == VRSAVE_REGNO)
30574 if (regno == VSCR_REGNO)
30577 /* These do not make much sense. */
30578 if (regno == FRAME_POINTER_REGNUM)
30580 if (regno == ARG_POINTER_REGNUM)
30585 gcc_unreachable ();
30589 /* We use the GCC 7 (and before) internal number for non-DWARF debug
30590 information, and also for .eh_frame. */
30591 /* Translate the regnos to their numbers in GCC 7 (and before). */
30594 if (FP_REGNO_P (regno))
30595 return regno - FIRST_FPR_REGNO + 32;
30596 if (ALTIVEC_REGNO_P (regno))
30597 return regno - FIRST_ALTIVEC_REGNO + 77;
30598 if (regno == LR_REGNO)
30600 if (regno == CTR_REGNO)
30602 if (regno == CA_REGNO)
30603 return 76; /* XER */
30604 if (CR_REGNO_P (regno))
30605 return regno - CR0_REGNO + 68;
30606 if (regno == VRSAVE_REGNO)
30608 if (regno == VSCR_REGNO)
30611 if (regno == FRAME_POINTER_REGNUM)
30613 if (regno == ARG_POINTER_REGNUM)
30618 gcc_unreachable ();
30621 /* target hook eh_return_filter_mode */
30622 static scalar_int_mode
30623 rs6000_eh_return_filter_mode (void)
30625 return TARGET_32BIT ? SImode : word_mode;
30628 /* Target hook for translate_mode_attribute. */
30629 static machine_mode
30630 rs6000_translate_mode_attribute (machine_mode mode)
30632 if ((FLOAT128_IEEE_P (mode)
30633 && ieee128_float_type_node == long_double_type_node)
30634 || (FLOAT128_IBM_P (mode)
30635 && ibm128_float_type_node == long_double_type_node))
30636 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
30640 /* Target hook for scalar_mode_supported_p. */
30642 rs6000_scalar_mode_supported_p (scalar_mode mode)
30644 /* -m32 does not support TImode. This is the default, from
30645 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
30646 same ABI as for -m32. But default_scalar_mode_supported_p allows
30647 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
30648 for -mpowerpc64. */
30649 if (TARGET_32BIT && mode == TImode)
30652 if (DECIMAL_FLOAT_MODE_P (mode))
30653 return default_decimal_float_supported_p ();
30654 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
30657 return default_scalar_mode_supported_p (mode);
30660 /* Target hook for vector_mode_supported_p. */
30662 rs6000_vector_mode_supported_p (machine_mode mode)
30664 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
30665 128-bit, the compiler might try to widen IEEE 128-bit to IBM
30667 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
30674 /* Target hook for floatn_mode. */
30675 static opt_scalar_float_mode
30676 rs6000_floatn_mode (int n, bool extended)
30686 if (TARGET_FLOAT128_TYPE)
30687 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
30689 return opt_scalar_float_mode ();
30692 return opt_scalar_float_mode ();
30695 /* Those are the only valid _FloatNx types. */
30696 gcc_unreachable ();
30710 if (TARGET_FLOAT128_TYPE)
30711 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
30713 return opt_scalar_float_mode ();
30716 return opt_scalar_float_mode ();
30722 /* Target hook for c_mode_for_suffix. */
30723 static machine_mode
30724 rs6000_c_mode_for_suffix (char suffix)
30726 if (TARGET_FLOAT128_TYPE)
30728 if (suffix == 'q' || suffix == 'Q')
30729 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
30731 /* At the moment, we are not defining a suffix for IBM extended double.
30732 If/when the default for -mabi=ieeelongdouble is changed, and we want
30733 to support __ibm128 constants in legacy library code, we may need to
30734 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
30735 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
30736 __float80 constants. */
30742 /* Target hook for invalid_arg_for_unprototyped_fn. */
30743 static const char *
30744 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
30746 return (!rs6000_darwin64_abi
30748 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
30749 && (funcdecl == NULL_TREE
30750 || (TREE_CODE (funcdecl) == FUNCTION_DECL
30751 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
30752 ? N_("AltiVec argument passed to unprototyped function")
30756 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
30757 setup by using __stack_chk_fail_local hidden function instead of
30758 calling __stack_chk_fail directly. Otherwise it is better to call
30759 __stack_chk_fail directly. */
30761 static tree ATTRIBUTE_UNUSED
30762 rs6000_stack_protect_fail (void)
30764 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
30765 ? default_hidden_stack_protect_fail ()
30766 : default_external_stack_protect_fail ();
30769 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30772 static unsigned HOST_WIDE_INT
30773 rs6000_asan_shadow_offset (void)
30775 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
30779 /* Mask options that we want to support inside of attribute((target)) and
30780 #pragma GCC target operations. Note, we do not include things like
30781 64/32-bit, endianness, hard/soft floating point, etc. that would have
30782 different calling sequences. */
30784 struct rs6000_opt_mask {
30785 const char *name; /* option name */
30786 HOST_WIDE_INT mask; /* mask to set */
30787 bool invert; /* invert sense of mask */
30788 bool valid_target; /* option is a target option */
30791 static struct rs6000_opt_mask const rs6000_opt_masks[] =
30793 { "altivec", OPTION_MASK_ALTIVEC, false, true },
30794 { "cmpb", OPTION_MASK_CMPB, false, true },
30795 { "crypto", OPTION_MASK_CRYPTO, false, true },
30796 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
30797 { "dlmzb", OPTION_MASK_DLMZB, false, true },
30798 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
30800 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
30801 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
30802 { "fprnd", OPTION_MASK_FPRND, false, true },
30803 { "future", OPTION_MASK_FUTURE, false, true },
30804 { "hard-dfp", OPTION_MASK_DFP, false, true },
30805 { "htm", OPTION_MASK_HTM, false, true },
30806 { "isel", OPTION_MASK_ISEL, false, true },
30807 { "mfcrf", OPTION_MASK_MFCRF, false, true },
30808 { "mfpgpr", 0, false, true },
30809 { "modulo", OPTION_MASK_MODULO, false, true },
30810 { "mulhw", OPTION_MASK_MULHW, false, true },
30811 { "multiple", OPTION_MASK_MULTIPLE, false, true },
30812 { "pcrel", OPTION_MASK_PCREL, false, true },
30813 { "popcntb", OPTION_MASK_POPCNTB, false, true },
30814 { "popcntd", OPTION_MASK_POPCNTD, false, true },
30815 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
30816 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
30817 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
30818 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
30819 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
30820 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
30821 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
30822 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
30823 { "prefixed-addr", OPTION_MASK_PREFIXED_ADDR, false, true },
30824 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
30825 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
30826 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
30827 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
30828 { "string", 0, false, true },
30829 { "update", OPTION_MASK_NO_UPDATE, true , true },
30830 { "vsx", OPTION_MASK_VSX, false, true },
30831 #ifdef OPTION_MASK_64BIT
30833 { "aix64", OPTION_MASK_64BIT, false, false },
30834 { "aix32", OPTION_MASK_64BIT, true, false },
30836 { "64", OPTION_MASK_64BIT, false, false },
30837 { "32", OPTION_MASK_64BIT, true, false },
30840 #ifdef OPTION_MASK_EABI
30841 { "eabi", OPTION_MASK_EABI, false, false },
30843 #ifdef OPTION_MASK_LITTLE_ENDIAN
30844 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
30845 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
30847 #ifdef OPTION_MASK_RELOCATABLE
30848 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
30850 #ifdef OPTION_MASK_STRICT_ALIGN
30851 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
30853 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
30854 { "string", 0, false, false },
30857 /* Builtin mask mapping for printing the flags. */
30858 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
30860 { "altivec", RS6000_BTM_ALTIVEC, false, false },
30861 { "vsx", RS6000_BTM_VSX, false, false },
30862 { "fre", RS6000_BTM_FRE, false, false },
30863 { "fres", RS6000_BTM_FRES, false, false },
30864 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
30865 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
30866 { "popcntd", RS6000_BTM_POPCNTD, false, false },
30867 { "cell", RS6000_BTM_CELL, false, false },
30868 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
30869 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
30870 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
30871 { "crypto", RS6000_BTM_CRYPTO, false, false },
30872 { "htm", RS6000_BTM_HTM, false, false },
30873 { "hard-dfp", RS6000_BTM_DFP, false, false },
30874 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
30875 { "long-double-128", RS6000_BTM_LDBL128, false, false },
30876 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
30877 { "float128", RS6000_BTM_FLOAT128, false, false },
30878 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
30881 /* Option variables that we want to support inside attribute((target)) and
30882 #pragma GCC target operations. */
30884 struct rs6000_opt_var {
30885 const char *name; /* option name */
30886 size_t global_offset; /* offset of the option in global_options. */
30887 size_t target_offset; /* offset of the option in target options. */
30890 static struct rs6000_opt_var const rs6000_opt_vars[] =
30893 offsetof (struct gcc_options, x_TARGET_FRIZ),
30894 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
30895 { "avoid-indexed-addresses",
30896 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
30897 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
30899 offsetof (struct gcc_options, x_rs6000_default_long_calls),
30900 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
30901 { "optimize-swaps",
30902 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
30903 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
30904 { "allow-movmisalign",
30905 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
30906 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
30908 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
30909 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
30911 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
30912 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
30913 { "align-branch-targets",
30914 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
30915 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
30917 offsetof (struct gcc_options, x_tls_markers),
30918 offsetof (struct cl_target_option, x_tls_markers), },
30920 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
30921 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
30923 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
30924 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
30925 { "speculate-indirect-jumps",
30926 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
30927 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
30930 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
30931 parsing. Return true if there were no errors. */
30934 rs6000_inner_target_options (tree args, bool attr_p)
30938 if (args == NULL_TREE)
30941 else if (TREE_CODE (args) == STRING_CST)
30943 char *p = ASTRDUP (TREE_STRING_POINTER (args));
30946 while ((q = strtok (p, ",")) != NULL)
30948 bool error_p = false;
30949 bool not_valid_p = false;
30950 const char *cpu_opt = NULL;
30953 if (strncmp (q, "cpu=", 4) == 0)
30955 int cpu_index = rs6000_cpu_name_lookup (q+4);
30956 if (cpu_index >= 0)
30957 rs6000_cpu_index = cpu_index;
30964 else if (strncmp (q, "tune=", 5) == 0)
30966 int tune_index = rs6000_cpu_name_lookup (q+5);
30967 if (tune_index >= 0)
30968 rs6000_tune_index = tune_index;
30978 bool invert = false;
30982 if (strncmp (r, "no-", 3) == 0)
30988 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
30989 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
30991 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
30993 if (!rs6000_opt_masks[i].valid_target)
30994 not_valid_p = true;
30998 rs6000_isa_flags_explicit |= mask;
31000 /* VSX needs altivec, so -mvsx automagically sets
31001 altivec and disables -mavoid-indexed-addresses. */
31004 if (mask == OPTION_MASK_VSX)
31006 mask |= OPTION_MASK_ALTIVEC;
31007 TARGET_AVOID_XFORM = 0;
31011 if (rs6000_opt_masks[i].invert)
31015 rs6000_isa_flags &= ~mask;
31017 rs6000_isa_flags |= mask;
31022 if (error_p && !not_valid_p)
31024 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
31025 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
31027 size_t j = rs6000_opt_vars[i].global_offset;
31028 *((int *) ((char *)&global_options + j)) = !invert;
31030 not_valid_p = false;
31038 const char *eprefix, *esuffix;
31043 eprefix = "__attribute__((__target__(";
31048 eprefix = "#pragma GCC target ";
31053 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
31055 else if (not_valid_p)
31056 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
31058 error ("%s%qs%s is invalid", eprefix, q, esuffix);
31063 else if (TREE_CODE (args) == TREE_LIST)
31067 tree value = TREE_VALUE (args);
31070 bool ret2 = rs6000_inner_target_options (value, attr_p);
31074 args = TREE_CHAIN (args);
31076 while (args != NULL_TREE);
31081 error ("attribute %<target%> argument not a string");
31088 /* Print out the target options as a list for -mdebug=target. */
31091 rs6000_debug_target_options (tree args, const char *prefix)
31093 if (args == NULL_TREE)
31094 fprintf (stderr, "%s<NULL>", prefix);
31096 else if (TREE_CODE (args) == STRING_CST)
31098 char *p = ASTRDUP (TREE_STRING_POINTER (args));
31101 while ((q = strtok (p, ",")) != NULL)
31104 fprintf (stderr, "%s\"%s\"", prefix, q);
31109 else if (TREE_CODE (args) == TREE_LIST)
31113 tree value = TREE_VALUE (args);
31116 rs6000_debug_target_options (value, prefix);
31119 args = TREE_CHAIN (args);
31121 while (args != NULL_TREE);
31125 gcc_unreachable ();
31131 /* Hook to validate attribute((target("..."))). */
31134 rs6000_valid_attribute_p (tree fndecl,
31135 tree ARG_UNUSED (name),
31139 struct cl_target_option cur_target;
31142 tree new_target, new_optimize;
31143 tree func_optimize;
31145 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
31147 if (TARGET_DEBUG_TARGET)
31149 tree tname = DECL_NAME (fndecl);
31150 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
31152 fprintf (stderr, "function: %.*s\n",
31153 (int) IDENTIFIER_LENGTH (tname),
31154 IDENTIFIER_POINTER (tname));
31156 fprintf (stderr, "function: unknown\n");
31158 fprintf (stderr, "args:");
31159 rs6000_debug_target_options (args, " ");
31160 fprintf (stderr, "\n");
31163 fprintf (stderr, "flags: 0x%x\n", flags);
31165 fprintf (stderr, "--------------------\n");
31168 /* attribute((target("default"))) does nothing, beyond
31169 affecting multi-versioning. */
31170 if (TREE_VALUE (args)
31171 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
31172 && TREE_CHAIN (args) == NULL_TREE
31173 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
31176 old_optimize = build_optimization_node (&global_options);
31177 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
31179 /* If the function changed the optimization levels as well as setting target
31180 options, start with the optimizations specified. */
31181 if (func_optimize && func_optimize != old_optimize)
31182 cl_optimization_restore (&global_options,
31183 TREE_OPTIMIZATION (func_optimize));
31185 /* The target attributes may also change some optimization flags, so update
31186 the optimization options if necessary. */
31187 cl_target_option_save (&cur_target, &global_options);
31188 rs6000_cpu_index = rs6000_tune_index = -1;
31189 ret = rs6000_inner_target_options (args, true);
31191 /* Set up any additional state. */
31194 ret = rs6000_option_override_internal (false);
31195 new_target = build_target_option_node (&global_options);
31200 new_optimize = build_optimization_node (&global_options);
31207 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
31209 if (old_optimize != new_optimize)
31210 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
31213 cl_target_option_restore (&global_options, &cur_target);
31215 if (old_optimize != new_optimize)
31216 cl_optimization_restore (&global_options,
31217 TREE_OPTIMIZATION (old_optimize));
31223 /* Hook to validate the current #pragma GCC target and set the state, and
31224 update the macros based on what was changed. If ARGS is NULL, then
31225 POP_TARGET is used to reset the options. */
31228 rs6000_pragma_target_parse (tree args, tree pop_target)
31230 tree prev_tree = build_target_option_node (&global_options);
31232 struct cl_target_option *prev_opt, *cur_opt;
31233 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
31234 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
31236 if (TARGET_DEBUG_TARGET)
31238 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
31239 fprintf (stderr, "args:");
31240 rs6000_debug_target_options (args, " ");
31241 fprintf (stderr, "\n");
31245 fprintf (stderr, "pop_target:\n");
31246 debug_tree (pop_target);
31249 fprintf (stderr, "pop_target: <NULL>\n");
31251 fprintf (stderr, "--------------------\n");
31256 cur_tree = ((pop_target)
31258 : target_option_default_node);
31259 cl_target_option_restore (&global_options,
31260 TREE_TARGET_OPTION (cur_tree));
31264 rs6000_cpu_index = rs6000_tune_index = -1;
31265 if (!rs6000_inner_target_options (args, false)
31266 || !rs6000_option_override_internal (false)
31267 || (cur_tree = build_target_option_node (&global_options))
31270 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
31271 fprintf (stderr, "invalid pragma\n");
31277 target_option_current_node = cur_tree;
31278 rs6000_activate_target_options (target_option_current_node);
31280 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
31281 change the macros that are defined. */
31282 if (rs6000_target_modify_macros_ptr)
31284 prev_opt = TREE_TARGET_OPTION (prev_tree);
31285 prev_bumask = prev_opt->x_rs6000_builtin_mask;
31286 prev_flags = prev_opt->x_rs6000_isa_flags;
31288 cur_opt = TREE_TARGET_OPTION (cur_tree);
31289 cur_flags = cur_opt->x_rs6000_isa_flags;
31290 cur_bumask = cur_opt->x_rs6000_builtin_mask;
31292 diff_bumask = (prev_bumask ^ cur_bumask);
31293 diff_flags = (prev_flags ^ cur_flags);
31295 if ((diff_flags != 0) || (diff_bumask != 0))
31297 /* Delete old macros. */
31298 rs6000_target_modify_macros_ptr (false,
31299 prev_flags & diff_flags,
31300 prev_bumask & diff_bumask);
31302 /* Define new macros. */
31303 rs6000_target_modify_macros_ptr (true,
31304 cur_flags & diff_flags,
31305 cur_bumask & diff_bumask);
31313 /* Remember the last target of rs6000_set_current_function. */
31314 static GTY(()) tree rs6000_previous_fndecl;
31316 /* Restore target's globals from NEW_TREE and invalidate the
31317 rs6000_previous_fndecl cache. */
31320 rs6000_activate_target_options (tree new_tree)
31322 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
31323 if (TREE_TARGET_GLOBALS (new_tree))
31324 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
31325 else if (new_tree == target_option_default_node)
31326 restore_target_globals (&default_target_globals);
31328 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
31329 rs6000_previous_fndecl = NULL_TREE;
31332 /* Establish appropriate back-end context for processing the function
31333 FNDECL. The argument might be NULL to indicate processing at top
31334 level, outside of any function scope. */
31336 rs6000_set_current_function (tree fndecl)
31338 if (TARGET_DEBUG_TARGET)
31340 fprintf (stderr, "\n==================== rs6000_set_current_function");
31343 fprintf (stderr, ", fndecl %s (%p)",
31344 (DECL_NAME (fndecl)
31345 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
31346 : "<unknown>"), (void *)fndecl);
31348 if (rs6000_previous_fndecl)
31349 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
31351 fprintf (stderr, "\n");
31354 /* Only change the context if the function changes. This hook is called
31355 several times in the course of compiling a function, and we don't want to
31356 slow things down too much or call target_reinit when it isn't safe. */
31357 if (fndecl == rs6000_previous_fndecl)
31361 if (rs6000_previous_fndecl == NULL_TREE)
31362 old_tree = target_option_current_node;
31363 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
31364 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
31366 old_tree = target_option_default_node;
31369 if (fndecl == NULL_TREE)
31371 if (old_tree != target_option_current_node)
31372 new_tree = target_option_current_node;
31374 new_tree = NULL_TREE;
31378 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31379 if (new_tree == NULL_TREE)
31380 new_tree = target_option_default_node;
31383 if (TARGET_DEBUG_TARGET)
31387 fprintf (stderr, "\nnew fndecl target specific options:\n");
31388 debug_tree (new_tree);
31393 fprintf (stderr, "\nold fndecl target specific options:\n");
31394 debug_tree (old_tree);
31397 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
31398 fprintf (stderr, "--------------------\n");
31401 if (new_tree && old_tree != new_tree)
31402 rs6000_activate_target_options (new_tree);
31405 rs6000_previous_fndecl = fndecl;
31409 /* Save the current options */
31412 rs6000_function_specific_save (struct cl_target_option *ptr,
31413 struct gcc_options *opts)
31415 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
31416 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
31419 /* Restore the current options */
31422 rs6000_function_specific_restore (struct gcc_options *opts,
31423 struct cl_target_option *ptr)
31426 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
31427 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
31428 (void) rs6000_option_override_internal (false);
31431 /* Print the current options */
31434 rs6000_function_specific_print (FILE *file, int indent,
31435 struct cl_target_option *ptr)
31437 rs6000_print_isa_options (file, indent, "Isa options set",
31438 ptr->x_rs6000_isa_flags);
31440 rs6000_print_isa_options (file, indent, "Isa options explicit",
31441 ptr->x_rs6000_isa_flags_explicit);
31444 /* Helper function to print the current isa or misc options on a line. */
31447 rs6000_print_options_internal (FILE *file,
31449 const char *string,
31450 HOST_WIDE_INT flags,
31451 const char *prefix,
31452 const struct rs6000_opt_mask *opts,
31453 size_t num_elements)
31456 size_t start_column = 0;
31458 size_t max_column = 120;
31459 size_t prefix_len = strlen (prefix);
31460 size_t comma_len = 0;
31461 const char *comma = "";
31464 start_column += fprintf (file, "%*s", indent, "");
31468 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
31472 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
31474 /* Print the various mask options. */
31475 cur_column = start_column;
31476 for (i = 0; i < num_elements; i++)
31478 bool invert = opts[i].invert;
31479 const char *name = opts[i].name;
31480 const char *no_str = "";
31481 HOST_WIDE_INT mask = opts[i].mask;
31482 size_t len = comma_len + prefix_len + strlen (name);
31486 if ((flags & mask) == 0)
31489 len += sizeof ("no-") - 1;
31497 if ((flags & mask) != 0)
31500 len += sizeof ("no-") - 1;
31507 if (cur_column > max_column)
31509 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
31510 cur_column = start_column + len;
31514 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
31516 comma_len = sizeof (", ") - 1;
31519 fputs ("\n", file);
31522 /* Helper function to print the current isa options on a line. */
31525 rs6000_print_isa_options (FILE *file, int indent, const char *string,
31526 HOST_WIDE_INT flags)
31528 rs6000_print_options_internal (file, indent, string, flags, "-m",
31529 &rs6000_opt_masks[0],
31530 ARRAY_SIZE (rs6000_opt_masks));
31534 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
31535 HOST_WIDE_INT flags)
31537 rs6000_print_options_internal (file, indent, string, flags, "",
31538 &rs6000_builtin_mask_names[0],
31539 ARRAY_SIZE (rs6000_builtin_mask_names));
31542 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
31543 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
31544 -mupper-regs-df, etc.).
31546 If the user used -mno-power8-vector, we need to turn off all of the implicit
31547 ISA 2.07 and 3.0 options that relate to the vector unit.
31549 If the user used -mno-power9-vector, we need to turn off all of the implicit
31550 ISA 3.0 options that relate to the vector unit.
31552 This function does not handle explicit options such as the user specifying
31553 -mdirect-move. These are handled in rs6000_option_override_internal, and
31554 the appropriate error is given if needed.
31556 We return a mask of all of the implicit options that should not be enabled
31559 static HOST_WIDE_INT
31560 rs6000_disable_incompatible_switches (void)
31562 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
31565 static const struct {
31566 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
31567 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
31568 const char *const name; /* name of the switch. */
31570 { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
31571 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
31572 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
31573 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
31576 for (i = 0; i < ARRAY_SIZE (flags); i++)
31578 HOST_WIDE_INT no_flag = flags[i].no_flag;
31580 if ((rs6000_isa_flags & no_flag) == 0
31581 && (rs6000_isa_flags_explicit & no_flag) != 0)
31583 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
31584 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
31590 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
31591 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
31593 set_flags &= ~rs6000_opt_masks[j].mask;
31594 error ("%<-mno-%s%> turns off %<-m%s%>",
31596 rs6000_opt_masks[j].name);
31599 gcc_assert (!set_flags);
31602 rs6000_isa_flags &= ~dep_flags;
31603 ignore_masks |= no_flag | dep_flags;
31607 return ignore_masks;
31611 /* Helper function for printing the function name when debugging. */
31613 static const char *
31614 get_decl_name (tree fn)
31621 name = DECL_NAME (fn);
31623 return "<no-name>";
31625 return IDENTIFIER_POINTER (name);
31628 /* Return the clone id of the target we are compiling code for in a target
31629 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
31630 the priority list for the target clones (ordered from lowest to
31634 rs6000_clone_priority (tree fndecl)
31636 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31637 HOST_WIDE_INT isa_masks;
31638 int ret = CLONE_DEFAULT;
31639 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
31640 const char *attrs_str = NULL;
31642 attrs = TREE_VALUE (TREE_VALUE (attrs));
31643 attrs_str = TREE_STRING_POINTER (attrs);
31645 /* Return priority zero for default function. Return the ISA needed for the
31646 function if it is not the default. */
31647 if (strcmp (attrs_str, "default") != 0)
31649 if (fn_opts == NULL_TREE)
31650 fn_opts = target_option_default_node;
31652 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
31653 isa_masks = rs6000_isa_flags;
31655 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
31657 for (ret = CLONE_MAX - 1; ret != 0; ret--)
31658 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
31662 if (TARGET_DEBUG_TARGET)
31663 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
31664 get_decl_name (fndecl), ret);
31669 /* This compares the priority of target features in function DECL1 and DECL2.
31670 It returns positive value if DECL1 is higher priority, negative value if
31671 DECL2 is higher priority and 0 if they are the same. Note, priorities are
31672 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
31675 rs6000_compare_version_priority (tree decl1, tree decl2)
31677 int priority1 = rs6000_clone_priority (decl1);
31678 int priority2 = rs6000_clone_priority (decl2);
31679 int ret = priority1 - priority2;
31681 if (TARGET_DEBUG_TARGET)
31682 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
31683 get_decl_name (decl1), get_decl_name (decl2), ret);
31688 /* Make a dispatcher declaration for the multi-versioned function DECL.
31689 Calls to DECL function will be replaced with calls to the dispatcher
31690 by the front-end. Returns the decl of the dispatcher function. */
31693 rs6000_get_function_versions_dispatcher (void *decl)
31695 tree fn = (tree) decl;
31696 struct cgraph_node *node = NULL;
31697 struct cgraph_node *default_node = NULL;
31698 struct cgraph_function_version_info *node_v = NULL;
31699 struct cgraph_function_version_info *first_v = NULL;
31701 tree dispatch_decl = NULL;
31703 struct cgraph_function_version_info *default_version_info = NULL;
31704 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31706 if (TARGET_DEBUG_TARGET)
31707 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
31708 get_decl_name (fn));
31710 node = cgraph_node::get (fn);
31711 gcc_assert (node != NULL);
31713 node_v = node->function_version ();
31714 gcc_assert (node_v != NULL);
31716 if (node_v->dispatcher_resolver != NULL)
31717 return node_v->dispatcher_resolver;
31719 /* Find the default version and make it the first node. */
31721 /* Go to the beginning of the chain. */
31722 while (first_v->prev != NULL)
31723 first_v = first_v->prev;
31725 default_version_info = first_v;
31726 while (default_version_info != NULL)
31728 const tree decl2 = default_version_info->this_node->decl;
31729 if (is_function_default_version (decl2))
31731 default_version_info = default_version_info->next;
31734 /* If there is no default node, just return NULL. */
31735 if (default_version_info == NULL)
31738 /* Make default info the first node. */
31739 if (first_v != default_version_info)
31741 default_version_info->prev->next = default_version_info->next;
31742 if (default_version_info->next)
31743 default_version_info->next->prev = default_version_info->prev;
31744 first_v->prev = default_version_info;
31745 default_version_info->next = first_v;
31746 default_version_info->prev = NULL;
31749 default_node = default_version_info->this_node;
31751 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
31752 error_at (DECL_SOURCE_LOCATION (default_node->decl),
31753 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
31754 "exports hardware capability bits");
31757 if (targetm.has_ifunc_p ())
31759 struct cgraph_function_version_info *it_v = NULL;
31760 struct cgraph_node *dispatcher_node = NULL;
31761 struct cgraph_function_version_info *dispatcher_version_info = NULL;
31763 /* Right now, the dispatching is done via ifunc. */
31764 dispatch_decl = make_dispatcher_decl (default_node->decl);
31766 dispatcher_node = cgraph_node::get_create (dispatch_decl);
31767 gcc_assert (dispatcher_node != NULL);
31768 dispatcher_node->dispatcher_function = 1;
31769 dispatcher_version_info
31770 = dispatcher_node->insert_new_function_version ();
31771 dispatcher_version_info->next = default_version_info;
31772 dispatcher_node->definition = 1;
31774 /* Set the dispatcher for all the versions. */
31775 it_v = default_version_info;
31776 while (it_v != NULL)
31778 it_v->dispatcher_resolver = dispatch_decl;
31784 error_at (DECL_SOURCE_LOCATION (default_node->decl),
31785 "multiversioning needs ifunc which is not supported "
31790 return dispatch_decl;
31793 /* Make the resolver function decl to dispatch the versions of a multi-
31794 versioned function, DEFAULT_DECL. Create an empty basic block in the
31795 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
31799 make_resolver_func (const tree default_decl,
31800 const tree dispatch_decl,
31801 basic_block *empty_bb)
31803 /* Make the resolver function static. The resolver function returns
31805 tree decl_name = clone_function_name (default_decl, "resolver");
31806 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
31807 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
31808 tree decl = build_fn_decl (resolver_name, type);
31809 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
31811 DECL_NAME (decl) = decl_name;
31812 TREE_USED (decl) = 1;
31813 DECL_ARTIFICIAL (decl) = 1;
31814 DECL_IGNORED_P (decl) = 0;
31815 TREE_PUBLIC (decl) = 0;
31816 DECL_UNINLINABLE (decl) = 1;
31818 /* Resolver is not external, body is generated. */
31819 DECL_EXTERNAL (decl) = 0;
31820 DECL_EXTERNAL (dispatch_decl) = 0;
31822 DECL_CONTEXT (decl) = NULL_TREE;
31823 DECL_INITIAL (decl) = make_node (BLOCK);
31824 DECL_STATIC_CONSTRUCTOR (decl) = 0;
31826 /* Build result decl and add to function_decl. */
31827 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
31828 DECL_CONTEXT (t) = decl;
31829 DECL_ARTIFICIAL (t) = 1;
31830 DECL_IGNORED_P (t) = 1;
31831 DECL_RESULT (decl) = t;
31833 gimplify_function_tree (decl);
31834 push_cfun (DECL_STRUCT_FUNCTION (decl));
31835 *empty_bb = init_lowered_empty_function (decl, false,
31836 profile_count::uninitialized ());
31838 cgraph_node::add_new_function (decl, true);
31839 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
31843 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
31844 DECL_ATTRIBUTES (dispatch_decl)
31845 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
31847 cgraph_node::create_same_body_alias (dispatch_decl, decl);
31852 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
31853 return a pointer to VERSION_DECL if we are running on a machine that
31854 supports the index CLONE_ISA hardware architecture bits. This function will
31855 be called during version dispatch to decide which function version to
31856 execute. It returns the basic block at the end, to which more conditions
31860 add_condition_to_bb (tree function_decl, tree version_decl,
31861 int clone_isa, basic_block new_bb)
31863 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31865 gcc_assert (new_bb != NULL);
31866 gimple_seq gseq = bb_seq (new_bb);
31869 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31870 build_fold_addr_expr (version_decl));
31871 tree result_var = create_tmp_var (ptr_type_node);
31872 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
31873 gimple *return_stmt = gimple_build_return (result_var);
31875 if (clone_isa == CLONE_DEFAULT)
31877 gimple_seq_add_stmt (&gseq, convert_stmt);
31878 gimple_seq_add_stmt (&gseq, return_stmt);
31879 set_bb_seq (new_bb, gseq);
31880 gimple_set_bb (convert_stmt, new_bb);
31881 gimple_set_bb (return_stmt, new_bb);
31886 tree bool_zero = build_int_cst (bool_int_type_node, 0);
31887 tree cond_var = create_tmp_var (bool_int_type_node);
31888 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
31889 const char *arg_str = rs6000_clone_map[clone_isa].name;
31890 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31891 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31892 gimple_call_set_lhs (call_cond_stmt, cond_var);
31894 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31895 gimple_set_bb (call_cond_stmt, new_bb);
31896 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31898 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
31899 NULL_TREE, NULL_TREE);
31900 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31901 gimple_set_bb (if_else_stmt, new_bb);
31902 gimple_seq_add_stmt (&gseq, if_else_stmt);
31904 gimple_seq_add_stmt (&gseq, convert_stmt);
31905 gimple_seq_add_stmt (&gseq, return_stmt);
31906 set_bb_seq (new_bb, gseq);
31908 basic_block bb1 = new_bb;
31909 edge e12 = split_block (bb1, if_else_stmt);
31910 basic_block bb2 = e12->dest;
31911 e12->flags &= ~EDGE_FALLTHRU;
31912 e12->flags |= EDGE_TRUE_VALUE;
31914 edge e23 = split_block (bb2, return_stmt);
31915 gimple_set_bb (convert_stmt, bb2);
31916 gimple_set_bb (return_stmt, bb2);
31918 basic_block bb3 = e23->dest;
31919 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31922 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31928 /* This function generates the dispatch function for multi-versioned functions.
31929 DISPATCH_DECL is the function which will contain the dispatch logic.
31930 FNDECLS are the function choices for dispatch, and is a tree chain.
31931 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
31932 code is generated. */
31935 dispatch_function_versions (tree dispatch_decl,
31937 basic_block *empty_bb)
31941 vec<tree> *fndecls;
31942 tree clones[CLONE_MAX];
31944 if (TARGET_DEBUG_TARGET)
31945 fputs ("dispatch_function_versions, top\n", stderr);
31947 gcc_assert (dispatch_decl != NULL
31948 && fndecls_p != NULL
31949 && empty_bb != NULL);
31951 /* fndecls_p is actually a vector. */
31952 fndecls = static_cast<vec<tree> *> (fndecls_p);
31954 /* At least one more version other than the default. */
31955 gcc_assert (fndecls->length () >= 2);
31957 /* The first version in the vector is the default decl. */
31958 memset ((void *) clones, '\0', sizeof (clones));
31959 clones[CLONE_DEFAULT] = (*fndecls)[0];
31961 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
31962 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
31963 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
31964 recent glibc. If we ever need to call __builtin_cpu_init, we would need
31965 to insert the code here to do the call. */
31967 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31969 int priority = rs6000_clone_priority (ele);
31970 if (!clones[priority])
31971 clones[priority] = ele;
31974 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
31977 if (TARGET_DEBUG_TARGET)
31978 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
31979 ix, get_decl_name (clones[ix]));
31981 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
31988 /* Generate the dispatching code body to dispatch multi-versioned function
31989 DECL. The target hook is called to process the "target" attributes and
31990 provide the code to dispatch the right function at run-time. NODE points
31991 to the dispatcher decl whose body will be created. */
31994 rs6000_generate_version_dispatcher_body (void *node_p)
31997 basic_block empty_bb;
31998 struct cgraph_node *node = (cgraph_node *) node_p;
31999 struct cgraph_function_version_info *ninfo = node->function_version ();
32001 if (ninfo->dispatcher_resolver)
32002 return ninfo->dispatcher_resolver;
32004 /* node is going to be an alias, so remove the finalized bit. */
32005 node->definition = false;
32007 /* The first version in the chain corresponds to the default version. */
32008 ninfo->dispatcher_resolver = resolver
32009 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
32011 if (TARGET_DEBUG_TARGET)
32012 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
32013 get_decl_name (resolver));
32015 push_cfun (DECL_STRUCT_FUNCTION (resolver));
32016 auto_vec<tree, 2> fn_ver_vec;
32018 for (struct cgraph_function_version_info *vinfo = ninfo->next;
32020 vinfo = vinfo->next)
32022 struct cgraph_node *version = vinfo->this_node;
32023 /* Check for virtual functions here again, as by this time it should
32024 have been determined if this function needs a vtable index or
32025 not. This happens for methods in derived classes that override
32026 virtual methods in base classes but are not explicitly marked as
32028 if (DECL_VINDEX (version->decl))
32029 sorry ("Virtual function multiversioning not supported");
32031 fn_ver_vec.safe_push (version->decl);
32034 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
32035 cgraph_edge::rebuild_edges ();
32041 /* Hook to determine if one function can safely inline another. */
32044 rs6000_can_inline_p (tree caller, tree callee)
32047 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32048 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32050 /* If callee has no option attributes, then it is ok to inline. */
32054 /* If caller has no option attributes, but callee does then it is not ok to
32056 else if (!caller_tree)
32061 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
32062 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
32064 /* Callee's options should a subset of the caller's, i.e. a vsx function
32065 can inline an altivec function but a non-vsx function can't inline a
32067 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
32068 == callee_opts->x_rs6000_isa_flags)
32072 if (TARGET_DEBUG_TARGET)
32073 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
32074 get_decl_name (caller), get_decl_name (callee),
32075 (ret ? "can" : "cannot"));
32080 /* Allocate a stack temp and fixup the address so it meets the particular
32081 memory requirements (either offetable or REG+REG addressing). */
32084 rs6000_allocate_stack_temp (machine_mode mode,
32085 bool offsettable_p,
32088 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
32089 rtx addr = XEXP (stack, 0);
32090 int strict_p = reload_completed;
32092 if (!legitimate_indirect_address_p (addr, strict_p))
32095 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
32096 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32098 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
32099 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32105 /* Given a memory reference, if it is not a reg or reg+reg addressing,
32106 convert to such a form to deal with memory reference instructions
32107 like STFIWX and LDBRX that only take reg+reg addressing. */
32110 rs6000_force_indexed_or_indirect_mem (rtx x)
32112 machine_mode mode = GET_MODE (x);
32114 gcc_assert (MEM_P (x));
32115 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
32117 rtx addr = XEXP (x, 0);
32118 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
32120 rtx reg = XEXP (addr, 0);
32121 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
32122 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
32123 gcc_assert (REG_P (reg));
32124 emit_insn (gen_add3_insn (reg, reg, size_rtx));
32127 else if (GET_CODE (addr) == PRE_MODIFY)
32129 rtx reg = XEXP (addr, 0);
32130 rtx expr = XEXP (addr, 1);
32131 gcc_assert (REG_P (reg));
32132 gcc_assert (GET_CODE (expr) == PLUS);
32133 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
32137 if (GET_CODE (addr) == PLUS)
32139 rtx op0 = XEXP (addr, 0);
32140 rtx op1 = XEXP (addr, 1);
32141 op0 = force_reg (Pmode, op0);
32142 op1 = force_reg (Pmode, op1);
32143 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
32146 x = replace_equiv_address (x, force_reg (Pmode, addr));
32152 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
32154 On the RS/6000, all integer constants are acceptable, most won't be valid
32155 for particular insns, though. Only easy FP constants are acceptable. */
32158 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
32160 if (TARGET_ELF && tls_referenced_p (x))
32163 if (CONST_DOUBLE_P (x))
32164 return easy_fp_constant (x, mode);
32166 if (GET_CODE (x) == CONST_VECTOR)
32167 return easy_vector_constant (x, mode);
32173 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
32176 chain_already_loaded (rtx_insn *last)
32178 for (; last != NULL; last = PREV_INSN (last))
32180 if (NONJUMP_INSN_P (last))
32182 rtx patt = PATTERN (last);
32184 if (GET_CODE (patt) == SET)
32186 rtx lhs = XEXP (patt, 0);
32188 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
32196 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
32199 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32201 rtx func = func_desc;
32202 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
32203 rtx toc_load = NULL_RTX;
32204 rtx toc_restore = NULL_RTX;
32206 rtx abi_reg = NULL_RTX;
32210 bool is_pltseq_longcall;
32213 tlsarg = global_tlsarg;
32215 /* Handle longcall attributes. */
32216 is_pltseq_longcall = false;
32217 if ((INTVAL (cookie) & CALL_LONG) != 0
32218 && GET_CODE (func_desc) == SYMBOL_REF)
32220 func = rs6000_longcall_ref (func_desc, tlsarg);
32222 is_pltseq_longcall = true;
32225 /* Handle indirect calls. */
32226 if (!SYMBOL_REF_P (func)
32227 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
32229 if (!rs6000_pcrel_p (cfun))
32231 /* Save the TOC into its reserved slot before the call,
32232 and prepare to restore it after the call. */
32233 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
32234 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
32235 gen_rtvec (1, stack_toc_offset),
32237 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
32239 /* Can we optimize saving the TOC in the prologue or
32240 do we need to do it at every call? */
32241 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
32242 cfun->machine->save_toc_in_prologue = true;
32245 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32246 rtx stack_toc_mem = gen_frame_mem (Pmode,
32247 gen_rtx_PLUS (Pmode, stack_ptr,
32248 stack_toc_offset));
32249 MEM_VOLATILE_P (stack_toc_mem) = 1;
32250 if (is_pltseq_longcall)
32252 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
32253 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32254 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
32257 emit_move_insn (stack_toc_mem, toc_reg);
32261 if (DEFAULT_ABI == ABI_ELFv2)
32263 /* A function pointer in the ELFv2 ABI is just a plain address, but
32264 the ABI requires it to be loaded into r12 before the call. */
32265 func_addr = gen_rtx_REG (Pmode, 12);
32266 if (!rtx_equal_p (func_addr, func))
32267 emit_move_insn (func_addr, func);
32268 abi_reg = func_addr;
32269 /* Indirect calls via CTR are strongly preferred over indirect
32270 calls via LR, so move the address there. Needed to mark
32271 this insn for linker plt sequence editing too. */
32272 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32273 if (is_pltseq_longcall)
32275 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
32276 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32277 emit_insn (gen_rtx_SET (func_addr, mark_func));
32278 v = gen_rtvec (2, func_addr, func_desc);
32279 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32282 emit_move_insn (func_addr, abi_reg);
32286 /* A function pointer under AIX is a pointer to a data area whose
32287 first word contains the actual address of the function, whose
32288 second word contains a pointer to its TOC, and whose third word
32289 contains a value to place in the static chain register (r11).
32290 Note that if we load the static chain, our "trampoline" need
32291 not have any executable code. */
32293 /* Load up address of the actual function. */
32294 func = force_reg (Pmode, func);
32295 func_addr = gen_reg_rtx (Pmode);
32296 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
32298 /* Indirect calls via CTR are strongly preferred over indirect
32299 calls via LR, so move the address there. */
32300 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
32301 emit_move_insn (ctr_reg, func_addr);
32302 func_addr = ctr_reg;
32304 /* Prepare to load the TOC of the called function. Note that the
32305 TOC load must happen immediately before the actual call so
32306 that unwinding the TOC registers works correctly. See the
32307 comment in frob_update_context. */
32308 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
32309 rtx func_toc_mem = gen_rtx_MEM (Pmode,
32310 gen_rtx_PLUS (Pmode, func,
32312 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
32314 /* If we have a static chain, load it up. But, if the call was
32315 originally direct, the 3rd word has not been written since no
32316 trampoline has been built, so we ought not to load it, lest we
32317 override a static chain value. */
32318 if (!(GET_CODE (func_desc) == SYMBOL_REF
32319 && SYMBOL_REF_FUNCTION_P (func_desc))
32320 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
32321 && !chain_already_loaded (get_current_sequence ()->next->last))
32323 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
32324 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
32325 rtx func_sc_mem = gen_rtx_MEM (Pmode,
32326 gen_rtx_PLUS (Pmode, func,
32328 emit_move_insn (sc_reg, func_sc_mem);
32335 /* No TOC register needed for calls from PC-relative callers. */
32336 if (!rs6000_pcrel_p (cfun))
32337 /* Direct calls use the TOC: for local calls, the callee will
32338 assume the TOC register is set; for non-local calls, the
32339 PLT stub needs the TOC register. */
32344 /* Create the call. */
32345 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32346 if (value != NULL_RTX)
32347 call[0] = gen_rtx_SET (value, call[0]);
32351 call[n_call++] = toc_load;
32353 call[n_call++] = toc_restore;
32355 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
32357 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
32358 insn = emit_call_insn (insn);
32360 /* Mention all registers defined by the ABI to hold information
32361 as uses in CALL_INSN_FUNCTION_USAGE. */
32363 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32366 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
32369 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32374 gcc_assert (INTVAL (cookie) == 0);
32377 tlsarg = global_tlsarg;
32379 /* Create the call. */
32380 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
32381 if (value != NULL_RTX)
32382 call[0] = gen_rtx_SET (value, call[0]);
32384 call[1] = simple_return_rtx;
32386 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
32387 insn = emit_call_insn (insn);
32389 /* Note use of the TOC register. */
32390 if (!rs6000_pcrel_p (cfun))
32391 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
32392 gen_rtx_REG (Pmode, TOC_REGNUM));
32395 /* Expand code to perform a call under the SYSV4 ABI. */
32398 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32400 rtx func = func_desc;
32404 rtx abi_reg = NULL_RTX;
32408 tlsarg = global_tlsarg;
32410 /* Handle longcall attributes. */
32411 if ((INTVAL (cookie) & CALL_LONG) != 0
32412 && GET_CODE (func_desc) == SYMBOL_REF)
32414 func = rs6000_longcall_ref (func_desc, tlsarg);
32415 /* If the longcall was implemented as an inline PLT call using
32416 PLT unspecs then func will be REG:r11. If not, func will be
32417 a pseudo reg. The inline PLT call sequence supports lazy
32418 linking (and longcalls to functions in dlopen'd libraries).
32419 The other style of longcalls don't. The lazy linking entry
32420 to the dynamic symbol resolver requires r11 be the function
32421 address (as it is for linker generated PLT stubs). Ensure
32422 r11 stays valid to the bctrl by marking r11 used by the call. */
32427 /* Handle indirect calls. */
32428 if (GET_CODE (func) != SYMBOL_REF)
32430 func = force_reg (Pmode, func);
32432 /* Indirect calls via CTR are strongly preferred over indirect
32433 calls via LR, so move the address there. That can't be left
32434 to reload because we want to mark every instruction in an
32435 inline PLT call sequence with a reloc, enabling the linker to
32436 edit the sequence back to a direct call when that makes sense. */
32437 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32440 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
32441 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32442 emit_insn (gen_rtx_SET (func_addr, mark_func));
32443 v = gen_rtvec (2, func_addr, func_desc);
32444 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32447 emit_move_insn (func_addr, func);
32452 /* Create the call. */
32453 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32454 if (value != NULL_RTX)
32455 call[0] = gen_rtx_SET (value, call[0]);
32457 call[1] = gen_rtx_USE (VOIDmode, cookie);
32459 if (TARGET_SECURE_PLT
32461 && GET_CODE (func_addr) == SYMBOL_REF
32462 && !SYMBOL_REF_LOCAL_P (func_addr))
32463 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
32465 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
32467 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
32468 insn = emit_call_insn (insn);
32470 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32473 /* Expand code to perform a sibling call under the SysV4 ABI. */
32476 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32478 rtx func = func_desc;
32482 rtx abi_reg = NULL_RTX;
32485 tlsarg = global_tlsarg;
32487 /* Handle longcall attributes. */
32488 if ((INTVAL (cookie) & CALL_LONG) != 0
32489 && GET_CODE (func_desc) == SYMBOL_REF)
32491 func = rs6000_longcall_ref (func_desc, tlsarg);
32492 /* If the longcall was implemented as an inline PLT call using
32493 PLT unspecs then func will be REG:r11. If not, func will be
32494 a pseudo reg. The inline PLT call sequence supports lazy
32495 linking (and longcalls to functions in dlopen'd libraries).
32496 The other style of longcalls don't. The lazy linking entry
32497 to the dynamic symbol resolver requires r11 be the function
32498 address (as it is for linker generated PLT stubs). Ensure
32499 r11 stays valid to the bctr by marking r11 used by the call. */
32504 /* Handle indirect calls. */
32505 if (GET_CODE (func) != SYMBOL_REF)
32507 func = force_reg (Pmode, func);
32509 /* Indirect sibcalls must go via CTR. That can't be left to
32510 reload because we want to mark every instruction in an inline
32511 PLT call sequence with a reloc, enabling the linker to edit
32512 the sequence back to a direct call when that makes sense. */
32513 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32516 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
32517 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32518 emit_insn (gen_rtx_SET (func_addr, mark_func));
32519 v = gen_rtvec (2, func_addr, func_desc);
32520 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32523 emit_move_insn (func_addr, func);
32528 /* Create the call. */
32529 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32530 if (value != NULL_RTX)
32531 call[0] = gen_rtx_SET (value, call[0]);
32533 call[1] = gen_rtx_USE (VOIDmode, cookie);
32534 call[2] = simple_return_rtx;
32536 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
32537 insn = emit_call_insn (insn);
32539 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32544 /* Expand code to perform a call under the Darwin ABI.
32545 Modulo handling of mlongcall, this is much the same as sysv.
32546 if/when the longcall optimisation is removed, we could drop this
32547 code and use the sysv case (taking care to avoid the tls stuff).
32549 We can use this for sibcalls too, if needed. */
32552 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
32553 rtx cookie, bool sibcall)
32555 rtx func = func_desc;
32559 int cookie_val = INTVAL (cookie);
32560 bool make_island = false;
32562 /* Handle longcall attributes, there are two cases for Darwin:
32563 1) Newer linkers are capable of synthesising any branch islands needed.
32564 2) We need a helper branch island synthesised by the compiler.
32565 The second case has mostly been retired and we don't use it for m64.
32566 In fact, it's is an optimisation, we could just indirect as sysv does..
32567 ... however, backwards compatibility for now.
32568 If we're going to use this, then we need to keep the CALL_LONG bit set,
32569 so that we can pick up the special insn form later. */
32570 if ((cookie_val & CALL_LONG) != 0
32571 && GET_CODE (func_desc) == SYMBOL_REF)
32573 /* FIXME: the longcall opt should not hang off picsymbol stubs. */
32574 if (darwin_picsymbol_stubs && TARGET_32BIT)
32575 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
32578 /* The linker is capable of doing this, but the user explicitly
32579 asked for -mlongcall, so we'll do the 'normal' version. */
32580 func = rs6000_longcall_ref (func_desc, NULL_RTX);
32581 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
32585 /* Handle indirect calls. */
32586 if (GET_CODE (func) != SYMBOL_REF)
32588 func = force_reg (Pmode, func);
32590 /* Indirect calls via CTR are strongly preferred over indirect
32591 calls via LR, and are required for indirect sibcalls, so move
32592 the address there. */
32593 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32594 emit_move_insn (func_addr, func);
32599 /* Create the call. */
32600 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32601 if (value != NULL_RTX)
32602 call[0] = gen_rtx_SET (value, call[0]);
32604 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
32607 call[2] = simple_return_rtx;
32609 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
32611 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
32612 insn = emit_call_insn (insn);
32613 /* Now we have the debug info in the insn, we can set up the branch island
32614 if we're using one. */
32617 tree funname = get_identifier (XSTR (func_desc, 0));
32619 if (no_previous_def (funname))
32621 rtx label_rtx = gen_label_rtx ();
32622 char *label_buf, temp_buf[256];
32623 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
32624 CODE_LABEL_NUMBER (label_rtx));
32625 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
32626 tree labelname = get_identifier (label_buf);
32627 add_compiler_branch_island (labelname, funname,
32628 insn_line ((const rtx_insn*)insn));
32635 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
32636 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
32639 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
32647 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
32648 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
32651 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
32657 /* Return whether we should generate PC-relative code for FNDECL. */
32659 rs6000_fndecl_pcrel_p (const_tree fndecl)
32661 if (DEFAULT_ABI != ABI_ELFv2)
32664 struct cl_target_option *opts = target_opts_for_fn (fndecl);
32666 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
32667 && TARGET_CMODEL == CMODEL_MEDIUM);
32670 /* Return whether we should generate PC-relative code for *FN. */
32672 rs6000_pcrel_p (struct function *fn)
32674 if (DEFAULT_ABI != ABI_ELFv2)
32677 /* Optimize usual case. */
32679 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
32680 && TARGET_CMODEL == CMODEL_MEDIUM);
32682 return rs6000_fndecl_pcrel_p (fn->decl);
32685 #ifdef HAVE_GAS_HIDDEN
32686 # define USE_HIDDEN_LINKONCE 1
32688 # define USE_HIDDEN_LINKONCE 0
32691 /* Fills in the label name that should be used for a 476 link stack thunk. */
32694 get_ppc476_thunk_name (char name[32])
32696 gcc_assert (TARGET_LINK_STACK);
32698 if (USE_HIDDEN_LINKONCE)
32699 sprintf (name, "__ppc476.get_thunk");
32701 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
32704 /* This function emits the simple thunk routine that is used to preserve
32705 the link stack on the 476 cpu. */
32707 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
32709 rs6000_code_end (void)
32714 if (!TARGET_LINK_STACK)
32717 get_ppc476_thunk_name (name);
32719 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
32720 build_function_type_list (void_type_node, NULL_TREE));
32721 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
32722 NULL_TREE, void_type_node);
32723 TREE_PUBLIC (decl) = 1;
32724 TREE_STATIC (decl) = 1;
32727 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
32729 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
32730 targetm.asm_out.unique_section (decl, 0);
32731 switch_to_section (get_named_section (decl, NULL, 0));
32732 DECL_WEAK (decl) = 1;
32733 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
32734 targetm.asm_out.globalize_label (asm_out_file, name);
32735 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
32736 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
32741 switch_to_section (text_section);
32742 ASM_OUTPUT_LABEL (asm_out_file, name);
32745 DECL_INITIAL (decl) = make_node (BLOCK);
32746 current_function_decl = decl;
32747 allocate_struct_function (decl, false);
32748 init_function_start (decl);
32749 first_function_block_is_cold = false;
32750 /* Make sure unwind info is emitted for the thunk if needed. */
32751 final_start_function (emit_barrier (), asm_out_file, 1);
32753 fputs ("\tblr\n", asm_out_file);
32755 final_end_function ();
32756 init_insn_lengths ();
32757 free_after_compilation (cfun);
32759 current_function_decl = NULL;
32762 /* Add r30 to hard reg set if the prologue sets it up and it is not
32763 pic_offset_table_rtx. */
32766 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
32768 if (!TARGET_SINGLE_PIC_BASE
32770 && TARGET_MINIMAL_TOC
32771 && !constant_pool_empty_p ())
32772 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
32773 if (cfun->machine->split_stack_argp_used)
32774 add_to_hard_reg_set (&set->set, Pmode, 12);
32776 /* Make sure the hard reg set doesn't include r2, which was possibly added
32777 via PIC_OFFSET_TABLE_REGNUM. */
32779 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
32783 /* Helper function for rs6000_split_logical to emit a logical instruction after
32784 spliting the operation to single GPR registers.
32786 DEST is the destination register.
32787 OP1 and OP2 are the input source registers.
32788 CODE is the base operation (AND, IOR, XOR, NOT).
32789 MODE is the machine mode.
32790 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
32791 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
32792 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
32795 rs6000_split_logical_inner (rtx dest,
32798 enum rtx_code code,
32800 bool complement_final_p,
32801 bool complement_op1_p,
32802 bool complement_op2_p)
32806 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
32807 if (op2 && CONST_INT_P (op2)
32808 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
32809 && !complement_final_p && !complement_op1_p && !complement_op2_p)
32811 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
32812 HOST_WIDE_INT value = INTVAL (op2) & mask;
32814 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
32819 emit_insn (gen_rtx_SET (dest, const0_rtx));
32823 else if (value == mask)
32825 if (!rtx_equal_p (dest, op1))
32826 emit_insn (gen_rtx_SET (dest, op1));
32831 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
32832 into separate ORI/ORIS or XORI/XORIS instrucitons. */
32833 else if (code == IOR || code == XOR)
32837 if (!rtx_equal_p (dest, op1))
32838 emit_insn (gen_rtx_SET (dest, op1));
32844 if (code == AND && mode == SImode
32845 && !complement_final_p && !complement_op1_p && !complement_op2_p)
32847 emit_insn (gen_andsi3 (dest, op1, op2));
32851 if (complement_op1_p)
32852 op1 = gen_rtx_NOT (mode, op1);
32854 if (complement_op2_p)
32855 op2 = gen_rtx_NOT (mode, op2);
32857 /* For canonical RTL, if only one arm is inverted it is the first. */
32858 if (!complement_op1_p && complement_op2_p)
32859 std::swap (op1, op2);
32861 bool_rtx = ((code == NOT)
32862 ? gen_rtx_NOT (mode, op1)
32863 : gen_rtx_fmt_ee (code, mode, op1, op2));
32865 if (complement_final_p)
32866 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
32868 emit_insn (gen_rtx_SET (dest, bool_rtx));
32871 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
32872 operations are split immediately during RTL generation to allow for more
32873 optimizations of the AND/IOR/XOR.
32875 OPERANDS is an array containing the destination and two input operands.
32876 CODE is the base operation (AND, IOR, XOR, NOT).
32877 MODE is the machine mode.
32878 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
32879 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
32880 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
32881 CLOBBER_REG is either NULL or a scratch register of type CC to allow
32882 formation of the AND instructions. */
32885 rs6000_split_logical_di (rtx operands[3],
32886 enum rtx_code code,
32887 bool complement_final_p,
32888 bool complement_op1_p,
32889 bool complement_op2_p)
32891 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
32892 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
32893 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
32894 enum hi_lo { hi = 0, lo = 1 };
32895 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
32898 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
32899 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
32900 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
32901 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
32904 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
32907 if (!CONST_INT_P (operands[2]))
32909 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
32910 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
32914 HOST_WIDE_INT value = INTVAL (operands[2]);
32915 HOST_WIDE_INT value_hi_lo[2];
32917 gcc_assert (!complement_final_p);
32918 gcc_assert (!complement_op1_p);
32919 gcc_assert (!complement_op2_p);
32921 value_hi_lo[hi] = value >> 32;
32922 value_hi_lo[lo] = value & lower_32bits;
32924 for (i = 0; i < 2; i++)
32926 HOST_WIDE_INT sub_value = value_hi_lo[i];
32928 if (sub_value & sign_bit)
32929 sub_value |= upper_32bits;
32931 op2_hi_lo[i] = GEN_INT (sub_value);
32933 /* If this is an AND instruction, check to see if we need to load
32934 the value in a register. */
32935 if (code == AND && sub_value != -1 && sub_value != 0
32936 && !and_operand (op2_hi_lo[i], SImode))
32937 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
32942 for (i = 0; i < 2; i++)
32944 /* Split large IOR/XOR operations. */
32945 if ((code == IOR || code == XOR)
32946 && CONST_INT_P (op2_hi_lo[i])
32947 && !complement_final_p
32948 && !complement_op1_p
32949 && !complement_op2_p
32950 && !logical_const_operand (op2_hi_lo[i], SImode))
32952 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
32953 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
32954 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
32955 rtx tmp = gen_reg_rtx (SImode);
32957 /* Make sure the constant is sign extended. */
32958 if ((hi_16bits & sign_bit) != 0)
32959 hi_16bits |= upper_32bits;
32961 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
32962 code, SImode, false, false, false);
32964 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
32965 code, SImode, false, false, false);
32968 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
32969 code, SImode, complement_final_p,
32970 complement_op1_p, complement_op2_p);
32976 /* Split the insns that make up boolean operations operating on multiple GPR
32977 registers. The boolean MD patterns ensure that the inputs either are
32978 exactly the same as the output registers, or there is no overlap.
32980 OPERANDS is an array containing the destination and two input operands.
32981 CODE is the base operation (AND, IOR, XOR, NOT).
32982 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
32983 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
32984 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
32987 rs6000_split_logical (rtx operands[3],
32988 enum rtx_code code,
32989 bool complement_final_p,
32990 bool complement_op1_p,
32991 bool complement_op2_p)
32993 machine_mode mode = GET_MODE (operands[0]);
32994 machine_mode sub_mode;
32996 int sub_size, regno0, regno1, nregs, i;
32998 /* If this is DImode, use the specialized version that can run before
32999 register allocation. */
33000 if (mode == DImode && !TARGET_POWERPC64)
33002 rs6000_split_logical_di (operands, code, complement_final_p,
33003 complement_op1_p, complement_op2_p);
33009 op2 = (code == NOT) ? NULL_RTX : operands[2];
33010 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
33011 sub_size = GET_MODE_SIZE (sub_mode);
33012 regno0 = REGNO (op0);
33013 regno1 = REGNO (op1);
33015 gcc_assert (reload_completed);
33016 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33017 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33019 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
33020 gcc_assert (nregs > 1);
33022 if (op2 && REG_P (op2))
33023 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
33025 for (i = 0; i < nregs; i++)
33027 int offset = i * sub_size;
33028 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
33029 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
33030 rtx sub_op2 = ((code == NOT)
33032 : simplify_subreg (sub_mode, op2, mode, offset));
33034 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
33035 complement_final_p, complement_op1_p,
33043 /* Return true if the peephole2 can combine a load involving a combination of
33044 an addis instruction and a load with an offset that can be fused together on
33048 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
33049 rtx addis_value, /* addis value. */
33050 rtx target, /* target register that is loaded. */
33051 rtx mem) /* bottom part of the memory addr. */
33056 /* Validate arguments. */
33057 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
33060 if (!base_reg_operand (target, GET_MODE (target)))
33063 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
33066 /* Allow sign/zero extension. */
33067 if (GET_CODE (mem) == ZERO_EXTEND
33068 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
33069 mem = XEXP (mem, 0);
33074 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
33077 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
33078 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33081 /* Validate that the register used to load the high value is either the
33082 register being loaded, or we can safely replace its use.
33084 This function is only called from the peephole2 pass and we assume that
33085 there are 2 instructions in the peephole (addis and load), so we want to
33086 check if the target register was not used in the memory address and the
33087 register to hold the addis result is dead after the peephole. */
33088 if (REGNO (addis_reg) != REGNO (target))
33090 if (reg_mentioned_p (target, mem))
33093 if (!peep2_reg_dead_p (2, addis_reg))
33096 /* If the target register being loaded is the stack pointer, we must
33097 avoid loading any other value into it, even temporarily. */
33098 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
33102 base_reg = XEXP (addr, 0);
33103 return REGNO (addis_reg) == REGNO (base_reg);
33106 /* During the peephole2 pass, adjust and expand the insns for a load fusion
33107 sequence. We adjust the addis register to use the target register. If the
33108 load sign extends, we adjust the code to do the zero extending load, and an
33109 explicit sign extension later since the fusion only covers zero extending
33113 operands[0] register set with addis (to be replaced with target)
33114 operands[1] value set via addis
33115 operands[2] target register being loaded
33116 operands[3] D-form memory reference using operands[0]. */
33119 expand_fusion_gpr_load (rtx *operands)
33121 rtx addis_value = operands[1];
33122 rtx target = operands[2];
33123 rtx orig_mem = operands[3];
33124 rtx new_addr, new_mem, orig_addr, offset;
33125 enum rtx_code plus_or_lo_sum;
33126 machine_mode target_mode = GET_MODE (target);
33127 machine_mode extend_mode = target_mode;
33128 machine_mode ptr_mode = Pmode;
33129 enum rtx_code extend = UNKNOWN;
33131 if (GET_CODE (orig_mem) == ZERO_EXTEND
33132 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
33134 extend = GET_CODE (orig_mem);
33135 orig_mem = XEXP (orig_mem, 0);
33136 target_mode = GET_MODE (orig_mem);
33139 gcc_assert (MEM_P (orig_mem));
33141 orig_addr = XEXP (orig_mem, 0);
33142 plus_or_lo_sum = GET_CODE (orig_addr);
33143 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
33145 offset = XEXP (orig_addr, 1);
33146 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
33147 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
33149 if (extend != UNKNOWN)
33150 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
33152 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
33153 UNSPEC_FUSION_GPR);
33154 emit_insn (gen_rtx_SET (target, new_mem));
33156 if (extend == SIGN_EXTEND)
33158 int sub_off = ((BYTES_BIG_ENDIAN)
33159 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
33162 = simplify_subreg (target_mode, target, extend_mode, sub_off);
33164 emit_insn (gen_rtx_SET (target,
33165 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
33171 /* Emit the addis instruction that will be part of a fused instruction
33175 emit_fusion_addis (rtx target, rtx addis_value)
33178 const char *addis_str = NULL;
33180 /* Emit the addis instruction. */
33181 fuse_ops[0] = target;
33182 if (satisfies_constraint_L (addis_value))
33184 fuse_ops[1] = addis_value;
33185 addis_str = "lis %0,%v1";
33188 else if (GET_CODE (addis_value) == PLUS)
33190 rtx op0 = XEXP (addis_value, 0);
33191 rtx op1 = XEXP (addis_value, 1);
33193 if (REG_P (op0) && CONST_INT_P (op1)
33194 && satisfies_constraint_L (op1))
33198 addis_str = "addis %0,%1,%v2";
33202 else if (GET_CODE (addis_value) == HIGH)
33204 rtx value = XEXP (addis_value, 0);
33205 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
33207 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
33208 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
33210 addis_str = "addis %0,%2,%1@toc@ha";
33212 else if (TARGET_XCOFF)
33213 addis_str = "addis %0,%1@u(%2)";
33216 gcc_unreachable ();
33219 else if (GET_CODE (value) == PLUS)
33221 rtx op0 = XEXP (value, 0);
33222 rtx op1 = XEXP (value, 1);
33224 if (GET_CODE (op0) == UNSPEC
33225 && XINT (op0, 1) == UNSPEC_TOCREL
33226 && CONST_INT_P (op1))
33228 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
33229 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
33232 addis_str = "addis %0,%2,%1+%3@toc@ha";
33234 else if (TARGET_XCOFF)
33235 addis_str = "addis %0,%1+%3@u(%2)";
33238 gcc_unreachable ();
33242 else if (satisfies_constraint_L (value))
33244 fuse_ops[1] = value;
33245 addis_str = "lis %0,%v1";
33248 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
33250 fuse_ops[1] = value;
33251 addis_str = "lis %0,%1@ha";
33256 fatal_insn ("Could not generate addis value for fusion", addis_value);
33258 output_asm_insn (addis_str, fuse_ops);
33261 /* Emit a D-form load or store instruction that is the second instruction
33262 of a fusion sequence. */
33265 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
33268 char insn_template[80];
33270 fuse_ops[0] = load_reg;
33271 fuse_ops[1] = addis_reg;
33273 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
33275 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
33276 fuse_ops[2] = offset;
33277 output_asm_insn (insn_template, fuse_ops);
33280 else if (GET_CODE (offset) == UNSPEC
33281 && XINT (offset, 1) == UNSPEC_TOCREL)
33284 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
33286 else if (TARGET_XCOFF)
33287 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
33290 gcc_unreachable ();
33292 fuse_ops[2] = XVECEXP (offset, 0, 0);
33293 output_asm_insn (insn_template, fuse_ops);
33296 else if (GET_CODE (offset) == PLUS
33297 && GET_CODE (XEXP (offset, 0)) == UNSPEC
33298 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
33299 && CONST_INT_P (XEXP (offset, 1)))
33301 rtx tocrel_unspec = XEXP (offset, 0);
33303 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
33305 else if (TARGET_XCOFF)
33306 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
33309 gcc_unreachable ();
33311 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
33312 fuse_ops[3] = XEXP (offset, 1);
33313 output_asm_insn (insn_template, fuse_ops);
33316 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
33318 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
33320 fuse_ops[2] = offset;
33321 output_asm_insn (insn_template, fuse_ops);
33325 fatal_insn ("Unable to generate load/store offset for fusion", offset);
33330 /* Given an address, convert it into the addis and load offset parts. Addresses
33331 created during the peephole2 process look like:
33332 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
33333 (unspec [(...)] UNSPEC_TOCREL)) */
33336 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
33340 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
33342 hi = XEXP (addr, 0);
33343 lo = XEXP (addr, 1);
33346 gcc_unreachable ();
33352 /* Return a string to fuse an addis instruction with a gpr load to the same
33353 register that we loaded up the addis instruction. The address that is used
33354 is the logical address that was formed during peephole2:
33355 (lo_sum (high) (low-part))
33357 The code is complicated, so we call output_asm_insn directly, and just
33361 emit_fusion_gpr_load (rtx target, rtx mem)
33366 const char *load_str = NULL;
33369 if (GET_CODE (mem) == ZERO_EXTEND)
33370 mem = XEXP (mem, 0);
33372 gcc_assert (REG_P (target) && MEM_P (mem));
33374 addr = XEXP (mem, 0);
33375 fusion_split_address (addr, &addis_value, &load_offset);
33377 /* Now emit the load instruction to the same register. */
33378 mode = GET_MODE (mem);
33396 gcc_assert (TARGET_POWERPC64);
33401 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
33404 /* Emit the addis instruction. */
33405 emit_fusion_addis (target, addis_value);
33407 /* Emit the D-form load instruction. */
33408 emit_fusion_load (target, target, load_offset, load_str);
33414 #ifdef RS6000_GLIBC_ATOMIC_FENV
33415 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
33416 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
33419 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
33422 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
33424 if (!TARGET_HARD_FLOAT)
33426 #ifdef RS6000_GLIBC_ATOMIC_FENV
33427 if (atomic_hold_decl == NULL_TREE)
33430 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
33431 get_identifier ("__atomic_feholdexcept"),
33432 build_function_type_list (void_type_node,
33433 double_ptr_type_node,
33435 TREE_PUBLIC (atomic_hold_decl) = 1;
33436 DECL_EXTERNAL (atomic_hold_decl) = 1;
33439 if (atomic_clear_decl == NULL_TREE)
33442 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
33443 get_identifier ("__atomic_feclearexcept"),
33444 build_function_type_list (void_type_node,
33446 TREE_PUBLIC (atomic_clear_decl) = 1;
33447 DECL_EXTERNAL (atomic_clear_decl) = 1;
33450 tree const_double = build_qualified_type (double_type_node,
33452 tree const_double_ptr = build_pointer_type (const_double);
33453 if (atomic_update_decl == NULL_TREE)
33456 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
33457 get_identifier ("__atomic_feupdateenv"),
33458 build_function_type_list (void_type_node,
33461 TREE_PUBLIC (atomic_update_decl) = 1;
33462 DECL_EXTERNAL (atomic_update_decl) = 1;
33465 tree fenv_var = create_tmp_var_raw (double_type_node);
33466 TREE_ADDRESSABLE (fenv_var) = 1;
33467 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
33469 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
33470 *clear = build_call_expr (atomic_clear_decl, 0);
33471 *update = build_call_expr (atomic_update_decl, 1,
33472 fold_convert (const_double_ptr, fenv_addr));
33477 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
33478 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
33479 tree call_mffs = build_call_expr (mffs, 0);
33481 /* Generates the equivalent of feholdexcept (&fenv_var)
33483 *fenv_var = __builtin_mffs ();
33485 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
33486 __builtin_mtfsf (0xff, fenv_hold); */
33488 /* Mask to clear everything except for the rounding modes and non-IEEE
33489 arithmetic flag. */
33490 const unsigned HOST_WIDE_INT hold_exception_mask =
33491 HOST_WIDE_INT_C (0xffffffff00000007);
33493 tree fenv_var = create_tmp_var_raw (double_type_node);
33495 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
33497 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
33498 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
33499 build_int_cst (uint64_type_node,
33500 hold_exception_mask));
33502 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
33505 tree hold_mtfsf = build_call_expr (mtfsf, 2,
33506 build_int_cst (unsigned_type_node, 0xff),
33509 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
33511 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
33513 double fenv_clear = __builtin_mffs ();
33514 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
33515 __builtin_mtfsf (0xff, fenv_clear); */
33517 /* Mask to clear everything except for the rounding modes and non-IEEE
33518 arithmetic flag. */
33519 const unsigned HOST_WIDE_INT clear_exception_mask =
33520 HOST_WIDE_INT_C (0xffffffff00000000);
33522 tree fenv_clear = create_tmp_var_raw (double_type_node);
33524 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
33526 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
33527 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
33529 build_int_cst (uint64_type_node,
33530 clear_exception_mask));
33532 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
33533 fenv_clear_llu_and);
33535 tree clear_mtfsf = build_call_expr (mtfsf, 2,
33536 build_int_cst (unsigned_type_node, 0xff),
33539 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
33541 /* Generates the equivalent of feupdateenv (&fenv_var)
33543 double old_fenv = __builtin_mffs ();
33544 double fenv_update;
33545 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
33546 (*(uint64_t*)fenv_var 0x1ff80fff);
33547 __builtin_mtfsf (0xff, fenv_update); */
33549 const unsigned HOST_WIDE_INT update_exception_mask =
33550 HOST_WIDE_INT_C (0xffffffff1fffff00);
33551 const unsigned HOST_WIDE_INT new_exception_mask =
33552 HOST_WIDE_INT_C (0x1ff80fff);
33554 tree old_fenv = create_tmp_var_raw (double_type_node);
33555 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
33557 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
33558 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
33559 build_int_cst (uint64_type_node,
33560 update_exception_mask));
33562 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
33563 build_int_cst (uint64_type_node,
33564 new_exception_mask));
33566 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
33567 old_llu_and, new_llu_and);
33569 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
33572 tree update_mtfsf = build_call_expr (mtfsf, 2,
33573 build_int_cst (unsigned_type_node, 0xff),
33574 fenv_update_mtfsf);
33576 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
33580 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
33582 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
33584 rtx_tmp0 = gen_reg_rtx (V2DFmode);
33585 rtx_tmp1 = gen_reg_rtx (V2DFmode);
33587 /* The destination of the vmrgew instruction layout is:
33588 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
33589 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
33590 vmrgew instruction will be correct. */
33591 if (BYTES_BIG_ENDIAN)
33593 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
33595 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
33600 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
33601 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
33604 rtx_tmp2 = gen_reg_rtx (V4SFmode);
33605 rtx_tmp3 = gen_reg_rtx (V4SFmode);
33607 emit_insn (gen_vsx_xvcdpsp (rtx_tmp2, rtx_tmp0));
33608 emit_insn (gen_vsx_xvcdpsp (rtx_tmp3, rtx_tmp1));
33610 if (BYTES_BIG_ENDIAN)
33611 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
33613 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
33617 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
33619 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
33621 rtx_tmp0 = gen_reg_rtx (V2DImode);
33622 rtx_tmp1 = gen_reg_rtx (V2DImode);
33624 /* The destination of the vmrgew instruction layout is:
33625 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
33626 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
33627 vmrgew instruction will be correct. */
33628 if (BYTES_BIG_ENDIAN)
33630 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
33631 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
33635 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
33636 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
33639 rtx_tmp2 = gen_reg_rtx (V4SFmode);
33640 rtx_tmp3 = gen_reg_rtx (V4SFmode);
33642 if (signed_convert)
33644 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
33645 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
33649 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
33650 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
33653 if (BYTES_BIG_ENDIAN)
33654 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
33656 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
33660 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
33663 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
33665 rtx_tmp0 = gen_reg_rtx (V2DFmode);
33666 rtx_tmp1 = gen_reg_rtx (V2DFmode);
33668 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
33669 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
33671 rtx_tmp2 = gen_reg_rtx (V4SImode);
33672 rtx_tmp3 = gen_reg_rtx (V4SImode);
33674 if (signed_convert)
33676 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
33677 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
33681 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
33682 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
33685 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
33688 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
33691 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
33692 optimization_type opt_type)
33697 return (opt_type == OPTIMIZE_FOR_SPEED
33698 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
33705 /* Implement TARGET_CONSTANT_ALIGNMENT. */
33707 static HOST_WIDE_INT
33708 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
33710 if (TREE_CODE (exp) == STRING_CST
33711 && (STRICT_ALIGNMENT || !optimize_size))
33712 return MAX (align, BITS_PER_WORD);
33716 /* Implement TARGET_STARTING_FRAME_OFFSET. */
33718 static HOST_WIDE_INT
33719 rs6000_starting_frame_offset (void)
33721 if (FRAME_GROWS_DOWNWARD)
33723 return RS6000_STARTING_FRAME_OFFSET;
33727 /* Create an alias for a mangled name where we have changed the mangling (in
33728 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
33729 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
33731 #if TARGET_ELF && RS6000_WEAK
33733 rs6000_globalize_decl_name (FILE * stream, tree decl)
33735 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
33737 targetm.asm_out.globalize_label (stream, name);
33739 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
33741 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
33742 const char *old_name;
33744 ieee128_mangling_gcc_8_1 = true;
33745 lang_hooks.set_decl_assembler_name (decl);
33746 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
33747 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
33748 ieee128_mangling_gcc_8_1 = false;
33750 if (strcmp (name, old_name) != 0)
33752 fprintf (stream, "\t.weak %s\n", old_name);
33753 fprintf (stream, "\t.set %s,%s\n", old_name, name);
33760 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
33761 function names from <foo>l to <foo>f128 if the default long double type is
33762 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
33763 include file switches the names on systems that support long double as IEEE
33764 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
33765 In the future, glibc will export names like __ieee128_sinf128 and we can
33766 switch to using those instead of using sinf128, which pollutes the user's
33769 This will switch the names for Fortran math functions as well (which doesn't
33770 use math.h). However, Fortran needs other changes to the compiler and
33771 library before you can switch the real*16 type at compile time.
33773 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
33774 only do this if the default is that long double is IBM extended double, and
33775 the user asked for IEEE 128-bit. */
33778 rs6000_mangle_decl_assembler_name (tree decl, tree id)
33780 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
33781 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
33783 size_t len = IDENTIFIER_LENGTH (id);
33784 const char *name = IDENTIFIER_POINTER (id);
33786 if (name[len - 1] == 'l')
33788 bool uses_ieee128_p = false;
33789 tree type = TREE_TYPE (decl);
33790 machine_mode ret_mode = TYPE_MODE (type);
33792 /* See if the function returns a IEEE 128-bit floating point type or
33794 if (ret_mode == TFmode || ret_mode == TCmode)
33795 uses_ieee128_p = true;
33798 function_args_iterator args_iter;
33801 /* See if the function passes a IEEE 128-bit floating point type
33802 or complex type. */
33803 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
33805 machine_mode arg_mode = TYPE_MODE (arg);
33806 if (arg_mode == TFmode || arg_mode == TCmode)
33808 uses_ieee128_p = true;
33814 /* If we passed or returned an IEEE 128-bit floating point type,
33815 change the name. */
33816 if (uses_ieee128_p)
33818 char *name2 = (char *) alloca (len + 4);
33819 memcpy (name2, name, len - 1);
33820 strcpy (name2 + len - 1, "f128");
33821 id = get_identifier (name2);
33829 /* Predict whether the given loop in gimple will be transformed in the RTL
33830 doloop_optimize pass. */
33833 rs6000_predict_doloop_p (struct loop *loop)
33837 /* On rs6000, targetm.can_use_doloop_p is actually
33838 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
33839 if (loop->inner != NULL)
33841 if (dump_file && (dump_flags & TDF_DETAILS))
33842 fprintf (dump_file, "Predict doloop failure due to"
33843 " loop nesting.\n");
33850 struct gcc_target targetm = TARGET_INITIALIZER;
33852 #include "gt-rs6000.h"